Skip to content

Commit

Permalink
Change the way the proxy works regarding internet, summarization and …
Browse files Browse the repository at this point in the history
…general queries - 3
  • Loading branch information
Hugo Saporetti Junior committed Mar 27, 2024
1 parent 3e586ac commit 5b79baf
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 60 deletions.
2 changes: 1 addition & 1 deletion docs/devel/askai-questions.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Questions:

1. Summarize my markdown files at my HomeSetup docs folder.
2. What are the current weather conditions in Belo Horizonte, Brazil today?
2. What are the current weather conditions in San Francisco, U.S today?
3. When is the upcoming Los Angeles Lakers match?
4. Who currently holds the office of President of the United States?
14 changes: 8 additions & 6 deletions src/main/askai/core/askai_messages.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from askai.core.askai_configs import configs
from askai.language.argos_translator import ArgosTranslator
from askai.language.language import Language
from functools import lru_cache

from hspylib.core.metaclass.singleton import Singleton
from hspylib.modules.application.exit_status import ExitStatus

from askai.core.askai_configs import configs
from askai.language.argos_translator import ArgosTranslator
from askai.language.language import Language


class AskAiMessages(metaclass=Singleton):
"""Provide access to static 'translated' messages."""
Expand Down Expand Up @@ -46,16 +48,16 @@ def goodbye(self) -> str:
return self.translate(" Goodbye, have a nice day !")

@lru_cache
def executing(self, cmd_line: str) -> str:
return self.translate(f"Executing command `{cmd_line}', please wait…")
def executing(self) -> str:
return self.translate(f"Executing command, please wait…")

@lru_cache
def cmd_success(self, exit_code: ExitStatus) -> str:
return self.translate(f"OK, command executed with {str(exit_code).lower()}")

@lru_cache
def searching(self) -> str:
return self.translate("Searching on Google…")
return self.translate(f"Searching Google…")

@lru_cache
def summarizing(self, path: str) -> str:
Expand Down
61 changes: 33 additions & 28 deletions src/main/askai/core/component/internet_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,10 @@
Copyright·(c)·2024,·HSPyLib
"""
import logging as log
import os
from typing import List, Optional

from askai.core.askai_events import AskAiEvents
from askai.core.askai_messages import msg
from askai.core.component.cache_service import PERSIST_DIR
from askai.core.component.summarizer import summarizer
from askai.core.support.langchain_support import lc_llm, load_document
from hspylib.core.metaclass.singleton import Singleton
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval_qa.base import RetrievalQA
Expand All @@ -29,9 +26,13 @@
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import Tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List, Optional

import logging as log
from askai.core.askai_events import AskAiEvents
from askai.core.askai_messages import msg
from askai.core.component.cache_service import PERSIST_DIR
from askai.core.component.summarizer import summarizer
from askai.core.model.search_result import SearchResult
from askai.core.support.langchain_support import lc_llm, load_document


class InternetService(metaclass=Singleton):
Expand All @@ -42,18 +43,22 @@ class InternetService(metaclass=Singleton):
ASKAI_INTERNET_DATA_KEY = "askai-internet-data"

@staticmethod
def scrap_sites(query: str, *sites: str) -> Optional[str]:
"""Scrap a web page and summarize it's contents."""
log.info("Scrapping sites: '%s'", str(sites))
documents: List[Document] = load_document(AsyncHtmlLoader, list(sites))
if len(documents) > 0:
texts: List[Document] = summarizer.text_splitter.split_documents(documents)
v_store = Chroma.from_documents(texts, lc_llm.create_embeddings(), persist_directory=str(PERSIST_DIR))
retriever = RetrievalQA.from_chain_type(
llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever()
)
search_results = retriever.invoke({"query": query})
return search_results["result"]
def scrap_sites(search: SearchResult) -> Optional[str]:
"""Scrap a web page and summarize it's contents.
:param search: The AI search parameters.
"""
query = '+'.join(search.keywords)
if len(search.sites) > 0:
log.info("Scrapping sites: '%s'", str(', '.join(search.sites)))
documents: List[Document] = load_document(AsyncHtmlLoader, list(search.sites))
if len(documents) > 0:
texts: List[Document] = summarizer.text_splitter.split_documents(documents)
v_store = Chroma.from_documents(texts, lc_llm.create_embeddings(), persist_directory=str(PERSIST_DIR))
retriever = RetrievalQA.from_chain_type(
llm=lc_llm.create_model(), chain_type="stuff", retriever=v_store.as_retriever()
)
search_results = retriever.invoke({"query": query})
return search_results["result"]
return None

def __init__(self):
Expand All @@ -63,18 +68,18 @@ def __init__(self):
chunk_size=800, chunk_overlap=8, separators=[" ", ", ", os.linesep]
)

def search_google(self, query: str, *sites: str) -> Optional[str]:
def search_google(self, search: SearchResult) -> Optional[str]:
"""Search the web using google search API.
:param query: The google search query string.
:param sites: The sites you want google to search for.
:param search: The AI search parameters.
"""
AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.searching())
if len(sites) > 0:
log.info("Searching GOOGLE for '%s' url: '%s'", query, str(sites))
query = '+'.join(search.keywords)
if len(search.sites) > 0:
search_results: List[Document] = []
for url in sites:
content = str(self._tool.run(f"{query} site: {url}"))
search_results.append(Document(content))
websites: str = ' OR '.join(['site: ' + url for url in search.sites])
AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.searching())
log.info("Searching GOOGLE for '%s' url: '%s'", query, str(', '.join(search.sites)))
content = str(self._tool.run(f"{query} {websites}"))
search_results.append(Document(content))
prompt = ChatPromptTemplate.from_messages([("system", "{query}\n\n{context}")])
chain = create_stuff_documents_chain(lc_llm.create_chat_model(), prompt)
return chain.invoke({"query": query, "context": search_results})
Expand Down
10 changes: 5 additions & 5 deletions src/main/askai/core/model/search_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@
Copyright·(c)·2024,·HSPyLib
"""
import json
from dataclasses import dataclass
from typing import List

import json


@dataclass
class SearchResult:
"""Keep track of the internet search responses."""

question: str = None
datetime: str = None
keywords: List[str] = None
sites: str | List[str] = None
after: str = None
results: str = None
response: str = None

def __str__(self):
return f"Internet search results: {json.dumps(self.__dict__, default=lambda obj: obj.__dict__)}"
return f"Search Results: {json.dumps(self.__dict__, default=lambda obj: obj.__dict__)}"
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _process_command(self, query_response: ProcessorResponse, cmd_line: str) ->
try:
if command and which(command):
cmd_line = expandvars(cmd_line.replace("~/", f"{os.getenv('HOME')}/").strip())
AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.executing(cmd_line))
AskAiEvents.ASKAI_BUS.events.reply.emit(message=msg.executing())
log.info("Executing command `%s'", cmd_line)
output, exit_code = Terminal.INSTANCE.shell_exec(cmd_line, shell=True)
if exit_code == ExitStatus.SUCCESS:
Expand Down
5 changes: 2 additions & 3 deletions src/main/askai/core/processor/instances/generic_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from functools import lru_cache
from typing import Optional, Tuple, List

from hspylib.core.zoned_datetime import now
from langchain_core.prompts import PromptTemplate

from askai.core.askai_messages import msg
Expand Down Expand Up @@ -69,8 +68,8 @@ def template(self) -> str:

def process(self, query_response: ProcessorResponse) -> Tuple[bool, Optional[str]]:
status = False
template = PromptTemplate(input_variables=["user", "cur_date"], template=self.template())
final_prompt: str = msg.translate(template.format(user=prompt.user, cur_date=now(shared.DATE_FMT)))
template = PromptTemplate(input_variables=["user", "datetime"], template=self.template())
final_prompt: str = msg.translate(template.format(user=prompt.user, datetime=shared.now))
shared.context.set("SETUP", final_prompt, "system")
shared.context.set("QUESTION", f"\n\nQuestion: {query_response.question}\n\nHelpful Answer:")
context: ContextRaw = shared.context.join("GENERAL", "SETUP", "QUESTION")
Expand Down
12 changes: 6 additions & 6 deletions src/main/askai/core/processor/instances/internet_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from functools import lru_cache
from typing import Optional, Tuple, List

from hspylib.core.zoned_datetime import now
from langchain_core.prompts import PromptTemplate

from askai.core.askai_messages import msg
Expand Down Expand Up @@ -60,8 +59,8 @@ def template(self) -> str:

def process(self, query_response: ProcessorResponse) -> Tuple[bool, Optional[str]]:
status = False
template = PromptTemplate(input_variables=["cur_date"], template=self.template())
final_prompt: str = msg.translate(template.format(cur_date=now(self.DATE_FMT)))
template = PromptTemplate(input_variables=["locale", "datetime"], template=self.template())
final_prompt: str = msg.translate(template.format(locale=shared.idiom, datetime=shared.now))
shared.context.set("SETUP", final_prompt, "system")
shared.context.set("QUESTION", f"\n\nQuestion: {query_response.question}\n\nHelpful Answer:")
context: ContextRaw = shared.context.join("SETUP", "QUESTION")
Expand All @@ -74,10 +73,11 @@ def process(self, query_response: ProcessorResponse) -> Tuple[bool, Optional[str
log.error(msg.invalid_response(search))
output = response.message.strip()
else:
query = " + ".join(search.keywords)
if output := internet.search_google(query, *search.sites):
shared.context.set("CONTEXT", output, "assistant")
if output := internet.search_google(search):
shared.context.push("GENERAL", query_response.question)
shared.context.push("GENERAL", output, "assistant")
cache.save_reply(query_response.question, output)
cache.save_query_history()
else:
output = msg.search_empty()
status = True
Expand Down
11 changes: 11 additions & 0 deletions src/main/askai/core/support/shared_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
from clitt.core.tui.line_input.line_input import line_input
from hspylib.core.metaclass.singleton import Singleton
from hspylib.core.preconditions import check_state
from hspylib.core.zoned_datetime import now
from hspylib.modules.cli.keyboard import Keyboard

from askai.core.askai_configs import configs
from askai.core.askai_prompt import prompt
from askai.core.engine.ai_engine import AIEngine
from askai.core.engine.engine_factory import EngineFactory
Expand All @@ -30,6 +32,7 @@ class SharedInstances(metaclass=Singleton):
def __init__(self) -> None:
self._engine: Optional[AIEngine] = None
self._context: Optional[ChatContext] = None
self._idiom: str = configs.language.idiom

@property
def engine(self) -> Optional[AIEngine]:
Expand All @@ -49,6 +52,10 @@ def context(self, value: ChatContext) -> None:
check_state(self._context is None, "Once set, this instance is immutable.")
self._context = value

@property
def idiom(self) -> str:
return self._idiom

@property
def nickname(self) -> str:
return f"%GREEN% {self.engine.nickname()}%NC%"
Expand All @@ -57,6 +64,10 @@ def nickname(self) -> str:
def username(self) -> str:
return f"%WHITE% {prompt.user.title()}%NC%"

@property
def now(self) -> str:
return now(self.DATE_FMT)

def create_engine(self, engine_name: str, model_name: str) -> AIEngine:
"""TODO"""
if self._engine is None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ Before responding to the user, it is imperative that you follow the step-by-step

"WORKFLOW 2":

1. Respond immediately to the user with the provided code: "bde6f44d-c1a0-4b0c-bd74-8278e468e50c". Do not include any additional information. This is the ultimate reply.
1. Respond immediately to the user with the provided code: "bde6f44d-c1a0-4b0c-bd74-8278e468e50c". Do not include any additional information or description, context. Do not try to make up an answer.
4 changes: 2 additions & 2 deletions src/main/askai/resources/assets/prompts/generic-prompt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Before responding to the user, it is imperative that you follow the step-by-step

2. Determine if you can craft your response using solely the information available in the conversation history.

3. Determine if you can craft your response from your database. Remember that your training data includes information up to January 2022, and we are currently in {cur_date}.
3. Determine if you can craft your response from your database. Remember that your training data includes information up to January 2022, and we are currently in: '{datetime}'.

4. If you're certain of the accuracy of your response, proceed with all tasks outlined in "WORKFLOW 1".

Expand All @@ -39,4 +39,4 @@ Before responding to the user, it is imperative that you follow the step-by-step

"WORKFLOW 2":

1. Respond immediately to the user with the provided code: "e35057db-f690-4299-ad4d-147d6124184c". Do not include any additional information. This is the ultimate reply.
1. Respond immediately to the user with the provided code: "e35057db-f690-4299-ad4d-147d6124184c". Do not include any additional information or description, context. Do not try to make up an answer.
26 changes: 19 additions & 7 deletions src/main/askai/resources/assets/prompts/internet-prompt.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
Act as a means of internet crawler, scraper and researcher.
Act as an internet facilitator by performing tasks such as web crawling, page scraping, and conducting google searches.

Before responding to the user, you must follow the step-by-step instructions provided below in sequential order:
Before responding to the user, it is imperative that you follow the step-by-step instructions provided below in sequential order:

1. Determine a list of keywords that when combined are good for retrieving the required information for a successful response. Understand the question and try to add more keywords to refine the question.
1. Interpret the question thoroughly to discern the user's underlying intent.

2. Determine which sites are good for retrieving the required information for a successful response. Please include a minimum of three URLs, and a maximum of five.
2. Rephrase the question to ensure it yields more precise results, considering its inherent nature. If the date and time are important, please include this information. Today is '{datetime}'.

3. Generate a JSON response containing the designated fields.
3. Identify a set of keywords that, when used together, effectively retrieve the necessary information to craft a comprehensive, successful, and accurate response.

4. The final response 'JSON' must contain the fields: 'keywords' and 'sites'.
4. Locate credible sources relevant to '{locale}' to collect essential information for creating a thoroughly researched response. Offer a curated list comprising a minimum of three and a maximum of five website URLs tailored to the user's locale. Please include only the base URL of each website without specifying any particular paths.

5. The final response is a formatted JSON with no additional description or context.
5. If the inquiry pertains to programming languages, operating systems, or other IT topics, append 'stackoverflow.com' and 'github.com' to your list.

6. If the inquiry pertains to whether forecast or related topics, append 'accuweather.com' to your list.

7. If the inquiry pertains to general information about individuals who are not notable, append 'linkedin.com', 'facebook.com', and 'instagram.com' to your list.

8. The final response is a formatted JSON with no additional description or context.

9. Do not use markdown to format the response message. Use plain JSON.

10. The final response 'JSON' must contain the string fields: 'question', 'datetime'.

11. The final response 'JSON' must contain the array fields, 'keywords' and 'sites'.

0 comments on commit 5b79baf

Please sign in to comment.