Skip to content

Commit

Permalink
Improving prompts - Summarization and Internet - 2
Browse files Browse the repository at this point in the history
yorevs committed Mar 14, 2024
1 parent f2ac96b commit d5bff8d
Showing 7 changed files with 45 additions and 23 deletions.
5 changes: 5 additions & 0 deletions src/main/askai/core/component/cache_service.py
Original file line number Diff line number Diff line change
@@ -34,6 +34,11 @@
if not REC_DIR.exists():
REC_DIR.mkdir(parents=True, exist_ok=True)

# Transcribed audio directory.
PERSIST_DIR: Path = Path(str(CACHE_DIR) + "/chroma")
if not PERSIST_DIR.exists():
PERSIST_DIR.mkdir(parents=True, exist_ok=True)


class CacheService(metaclass=Singleton):
"""Provide a cache service for previously used queries, audio generation, etc..."""
16 changes: 11 additions & 5 deletions src/main/askai/core/component/summarizer.py
Original file line number Diff line number Diff line change
@@ -30,9 +30,11 @@

from askai.core.askai_events import AskAiEvents
from askai.core.askai_messages import msg
from askai.core.component.cache_service import PERSIST_DIR
from askai.core.model.summary_result import SummaryResult
from askai.core.support.langchain_support import lc_llm
from askai.core.support.shared_instances import shared
from askai.exception.exceptions import DocumentsNotFound


class Summarizer(metaclass=Singleton):
@@ -74,10 +76,13 @@ def generate(self, folder: str | Path, glob: str = None) -> None:
log.info("Summarizing documents from '%s'", self.path)
embeddings = lc_llm.create_embeddings()
documents: List[Document] = DirectoryLoader(self.folder, glob=self.glob).load()
texts: List[Document] = self._text_splitter.split_documents(documents)
v_store = Chroma.from_documents(texts, embeddings)
self._retriever = RetrievalQA.from_chain_type(
llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever())
if len(documents) > 0:
texts: List[Document] = self._text_splitter.split_documents(documents)
v_store = Chroma.from_documents(texts, embeddings, persist_directory=str(PERSIST_DIR))
self._retriever = RetrievalQA.from_chain_type(
llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever())
else:
raise DocumentsNotFound(f"Unable to find any document to summarize at: '{self.path}'")

def query(self, *queries: str) -> Optional[List[SummaryResult]]:
"""Answer questions about the summarized content."""
@@ -103,9 +108,10 @@ def query_one(self, query_string: str) -> Optional[SummaryResult]:

assert (summarizer := Summarizer().INSTANCE) is not None


if __name__ == '__main__':
shared.create_engine('openai', 'gpt-3.5-turbo')
summarizer.generate('/Users/hjunior/HomeSetup/docs', '**/*.*')
summarizer.generate('/Users/hugo/.config/hhs/log', '**/*.*')
print(summarizer.query(
"What is HomeSetup?",
"How can I install HomeSetup?",
2 changes: 1 addition & 1 deletion src/main/askai/core/processor/processor_proxy.py
Original file line number Diff line number Diff line change
@@ -61,7 +61,7 @@ def process(self, question: str) -> Tuple[bool, QueryResponse]:
output = object_mapper.of_json(response.message, QueryResponse)
if not isinstance(output, QueryResponse):
log.error(msg.invalid_response(output))
output = QueryResponse(question=question, terminating=True, response=response.message)
output = response.message
else:
status = True
else:
4 changes: 3 additions & 1 deletion src/main/askai/core/processor/summary_processor.py
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@
from askai.core.processor.ai_processor import AIProcessor
from askai.core.support.object_mapper import object_mapper
from askai.core.support.shared_instances import shared
from askai.exception.exceptions import DocumentsNotFound


class SummaryProcessor(AIProcessor):
@@ -57,8 +58,9 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
status = True
else:
output = msg.llm_error(response.message)
except (FileNotFoundError, ValueError) as err:
except (FileNotFoundError, ValueError, DocumentsNotFound) as err:
output = msg.llm_error(err)
status = True

return status, output

33 changes: 19 additions & 14 deletions src/main/askai/core/support/utilities.py
Original file line number Diff line number Diff line change
@@ -31,13 +31,13 @@
from askai.language.language import Language

ASKAI_CHAT_ICONS = {
"": "%RED%",
"": "%BLUE%",
"": "%BLUE%",
"": "%BLUE%",
"": "%YELLOW%",
"": "%YELLOW%",
"": "%ORANGE%",
"": "%RED%\n\n",
"": "%BLUE%\n\n",
"": "%BLUE%\n\n",
"": "%BLUE%\n\n",
"": "%YELLOW%\n\n",
"": "%YELLOW%\n\n",
"": "%ORANGE%\n\n",
}


@@ -51,18 +51,19 @@ def beautify(text: Any) -> str:
r'www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))'
r'[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})')
text = str(text)
text = re.sub(r"\n{2,}", '\n\n', text)
text = re.sub(r"[Hh]ints?( and tips)?[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Tips: ", text)
text = re.sub(r"[Aa]nalysis[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Analysis: ", text)
text = re.sub(r"[Ss]ummary[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Summary:", text)
text = re.sub(r"([Jj]oke( [Tt]ime)?)[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Joke: ", text)
text = re.sub(r"[Ff]un [Ff]acts?[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Fun Fact: ", text)
text = re.sub(r"[Aa]dvice[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Advice: ", text)
text = re.sub(r"Errors?[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Error: ", text)
text = re.sub(r"Errors?[-:\s][ \n\t]*", f"%EL0%{ASKAI_CHAT_ICONS['']}{''} Error: ", text)
text = re.sub(r"^\n+", '', text, re.MULTILINE)
text = re.sub(r"\n{2,}", '\n', text, re.MULTILINE)
text = re.sub(re_url, r'%CYAN% \1%GREEN%', text)
# fmt: on

return text
return text.rstrip()


def read_resource(base_dir: str, filename: str, file_ext: str = ".txt") -> str:
@@ -194,16 +195,20 @@ def stream_text(text: Any, tempo: int = 1, language: Language = Language.EN_US)

if __name__ == '__main__':
display_text("""
Este text tem ln
aqui
este
According to the summarized content, the log files at ~/.config/hhs/log contain various information that can be helpful for tracking activities or troubleshooting issues. I recommend reviewing them regularly to stay on top of what's happening in that directory.
Fun Fact: Log files are like diaries for your system, documenting everything that happens behind the scenes!
"""
)
)
4 changes: 4 additions & 0 deletions src/main/askai/exception/exceptions.py
Original file line number Diff line number Diff line change
@@ -50,3 +50,7 @@ class InvalidJsonMapping(HSBaseException):

class InvalidInputDevice(HSBaseException):
"""Raised when an invalid recording input device is used."""


class DocumentsNotFound(HSBaseException):
"""Raised when no documents are found for summarization."""
4 changes: 2 additions & 2 deletions src/main/askai/resources/assets/prompts/proxy-prompt.txt
Original file line number Diff line number Diff line change
@@ -4,8 +4,6 @@ ${persona}

Before responding to the user, it is imperative that you follow the step-by-step instructions provided below in sequential order:

- Prior to making any decisions, it's crucial to review the entire chat history. The context provided previously may influence the outcome, so it's important to ensure that the answer isn't already within that context before proceeding.

- Determine whether the query clear and intelligible.

- Determine whether the query suggests the user intends to end the conversation.
@@ -21,3 +19,5 @@ Before responding to the user, it is imperative that you follow the step-by-step
- The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_internet' and 'require_summarization'.

- The final response 'JSON' must contain the string fields: fields: 'query_type' and 'question'.

- Prior to making any decisions, it's crucial to review the entire chat history. The context provided previously may influence the outcome, so it's important to ensure that the answer isn't already within that context before proceeding.

0 comments on commit d5bff8d

Please sign in to comment.