Skip to content

Commit

Permalink
Improving prompts - Summarization and Internet - 3
Browse files Browse the repository at this point in the history
  • Loading branch information
yorevs committed Mar 14, 2024
1 parent d5bff8d commit c23a2ba
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 25 deletions.
4 changes: 2 additions & 2 deletions src/main/askai/core/askai.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,8 @@ def _process_response(self, proxy_response: QueryResponse) -> bool:
# Query processors
if processor or (q_type := proxy_response.query_type):
if not processor and not (processor := AIProcessor.get_by_query_type(q_type)):
log.error(f"Unable to find a proper processor for query type: {q_type}")
self.reply_error(str(proxy_response))
log.error(f"Unable to find a proper processor: {str(proxy_response)}")
self.reply_error(msg.no_processor(q_type))
return False
log.info("%s::Processing response for '%s'", processor, proxy_response.question)
status, output = processor.process(proxy_response)
Expand Down
4 changes: 4 additions & 0 deletions src/main/askai/core/askai_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ def invalid_cmd_format(self, output: str) -> str:

# Failures

@lru_cache
def no_processor(self, query_type: str) -> str:
return self.translate(f"No suitable processor found for query type '{query_type}' !")

@lru_cache
def invalid_response(self, response_text: str) -> str:
return self.translate(f"Received an invalid query response/type '{response_text}' !")
Expand Down
22 changes: 15 additions & 7 deletions src/main/askai/core/component/internet_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@
from typing import Optional, List

from hspylib.core.metaclass.singleton import Singleton
from langchain.chains import load_summarize_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain_community.document_loaders.async_html import AsyncHtmlLoader
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import Tool
from langchain_text_splitters import CharacterTextSplitter

from askai.core.askai_events import AskAiEvents
from askai.core.askai_messages import msg
from askai.core.component.cache_service import PERSIST_DIR
from askai.core.component.summarizer import summarizer
from askai.core.support.langchain_support import lc_llm, load_document


Expand All @@ -38,13 +41,18 @@ class InternetService(metaclass=Singleton):
ASKAI_INTERNET_DATA_KEY = "askai-internet-data"

@staticmethod
def scrap_sites(*sites: str) -> Optional[str]:
"""TODO"""
def scrap_sites(query: str, *sites: str) -> Optional[str]:
"""Scrap a web page and summarize it's contents."""
log.info("Scrapping sites: '%s'", str(sites))
docs: List[Document] = load_document(AsyncHtmlLoader, *sites)
chain = load_summarize_chain(lc_llm.create_chat_model(), chain_type="stuff")
search_results = chain.invoke(docs)
return search_results['output_text']
documents: List[Document] = load_document(AsyncHtmlLoader, list(sites))
if len(documents) > 0:
texts: List[Document] = summarizer.text_splitter.split_documents(documents)
v_store = Chroma.from_documents(texts, lc_llm.create_embeddings(), persist_directory=str(PERSIST_DIR))
retriever = RetrievalQA.from_chain_type(
llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever())
search_results = retriever.invoke({"query": query})
return search_results['result']
return None

def __init__(self):
self._google = GoogleSearchAPIWrapper()
Expand Down
19 changes: 11 additions & 8 deletions src/main/askai/core/component/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter

from askai.core.askai_events import AskAiEvents
from askai.core.askai_messages import msg
Expand Down Expand Up @@ -64,6 +64,10 @@ def glob(self) -> str:
def path(self) -> str:
return f"{self.folder}{self.glob}"

@property
def text_splitter(self) -> TextSplitter:
return self._text_splitter

@lru_cache
def generate(self, folder: str | Path, glob: str = None) -> None:
"""Generate a summarization of the folder contents.
Expand All @@ -74,11 +78,10 @@ def generate(self, folder: str | Path, glob: str = None) -> None:
self._glob: str = glob.strip()
AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.summarizing(self.path))
log.info("Summarizing documents from '%s'", self.path)
embeddings = lc_llm.create_embeddings()
documents: List[Document] = DirectoryLoader(self.folder, glob=self.glob).load()
if len(documents) > 0:
texts: List[Document] = self._text_splitter.split_documents(documents)
v_store = Chroma.from_documents(texts, embeddings, persist_directory=str(PERSIST_DIR))
v_store = Chroma.from_documents(texts, lc_llm.create_embeddings(), persist_directory=str(PERSIST_DIR))
self._retriever = RetrievalQA.from_chain_type(
llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever())
else:
Expand All @@ -89,17 +92,17 @@ def query(self, *queries: str) -> Optional[List[SummaryResult]]:
check_argument(len(queries) > 0)
if self._retriever is not None:
results: List[SummaryResult] = []
for query_string in queries:
if result := self.query_one(query_string):
for query in queries:
if result := self.query_one(query):
results.append(result)
return results
return None

@lru_cache
def query_one(self, query_string: str) -> Optional[SummaryResult]:
def query_one(self, query: str) -> Optional[SummaryResult]:
"""Query the AI about a given query based on the summarized content."""
check_argument(len(query_string) > 0)
if result := self._retriever.invoke({"query": query_string}):
check_argument(len(query) > 0)
if result := self._retriever.invoke({"query": query}):
return SummaryResult(
self._folder, self._glob, result['query'].strip(), result['result'].strip()
)
Expand Down
1 change: 1 addition & 0 deletions src/main/askai/core/model/query_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class QueryResponse:
response: str = ""
terminating: bool = False
intelligible: bool = True
changing_subject: bool = False
require_internet: bool = False
require_summarization: bool = False
commands: List[TerminalCommand] = field(default_factory=list)
Expand Down
7 changes: 5 additions & 2 deletions src/main/askai/core/processor/internet_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Copyright·(c)·2024,·HSPyLib
"""
import logging as log
from functools import partial
from typing import Optional, Tuple

from hspylib.core.zoned_datetime import now
Expand All @@ -37,7 +38,6 @@ def __init__(self):

def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
status = False
output = None
template = PromptTemplate(input_variables=['cur_date'], template=self.template())
final_prompt: str = msg.translate(template.format(cur_date=now('%Y-%d-%m')))
shared.context.set("SETUP", final_prompt, "system")
Expand All @@ -49,7 +49,10 @@ def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]:
if (response := shared.engine.ask(context, temperature=0.0, top_p=0.0)) and response.is_success:
search: SearchResult = object_mapper.of_json(response.message, SearchResult)
query = " + ".join(search.keywords)
if results := internet.search_google(query, *search.sites):
fc_call = partial(internet.scrap_sites, query) \
if query_response.require_summarization \
else partial(internet.search_google, query)
if results := fc_call(*search.sites):
search.results = results
output = self._wrap_output(query_response, search)
shared.context.set("INTERNET", output, "assistant")
Expand Down
2 changes: 1 addition & 1 deletion src/main/askai/core/support/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def beautify(text: Any) -> str:
text = re.sub(r"([Jj]oke( [Tt]ime)?)[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Joke: ", text)
text = re.sub(r"[Ff]un [Ff]acts?[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Fun Fact: ", text)
text = re.sub(r"[Aa]dvice[-:\s][ \n\t]*", f"{ASKAI_CHAT_ICONS['']}{''} Advice: ", text)
text = re.sub(r"Errors?[-:\s][ \n\t]*", f"%EL0%{ASKAI_CHAT_ICONS['']}{''} Error: ", text)
text = re.sub(r"Errors?[-:\s][ \n\t]*", f"%EL1%{ASKAI_CHAT_ICONS['']}{''} Error: ", text)
text = re.sub(r"^\n+", '', text, re.MULTILINE)
text = re.sub(r"\n{2,}", '\n', text, re.MULTILINE)
text = re.sub(re_url, r'%CYAN% \1%GREEN%', text)
Expand Down
8 changes: 3 additions & 5 deletions src/main/askai/resources/assets/prompts/proxy-prompt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,14 @@ Before responding to the user, it is imperative that you follow the step-by-step

- Determine whether the query suggests the user intends to end the conversation.

- Determine whether the query suggests the user intends to change the subject of the conversation.

- Determine whether real-time data are required for ensuring the utmost accuracy and relevance in responses. This pertains specifically to situations where the required information may be outdated or cannot be retrieved from the chat history or your existing database.

- Determine whether summarizing documents is necessary to provide an accurate and comprehensive response.
- Determine whether summarizing documents or web sites is necessary to provide an accurate and comprehensive response.

- Prior to making any decisions, it's crucial to review the entire chat history. The context provided previously may influence the outcome, so it's important to ensure that the answer isn't already within that context before proceeding.

- The final response is a formatted JSON with no additional description or context.

- The final response 'JSON' must contain the boolean fields: 'intelligible', 'terminating', 'require_internet' and 'require_summarization'.

- The final response 'JSON' must contain the string fields: fields: 'query_type' and 'question'.

- Prior to making any decisions, it's crucial to review the entire chat history. The context provided previously may influence the outcome, so it's important to ensure that the answer isn't already within that context before proceeding.

0 comments on commit c23a2ba

Please sign in to comment.