Skip to content

Commit

Permalink
Improve internet search, evaluations (added white) and fixed black
Browse files Browse the repository at this point in the history
  • Loading branch information
yorevs committed Sep 28, 2024
1 parent 0c644be commit 7153aaa
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 91 deletions.
2 changes: 1 addition & 1 deletion src/main/askai/core/component/geo_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def timezone(self) -> str:

@property
def location(self) -> str:
return f"{self.city}, {self.region_name} {self.country}"
return f"%CYAN%{self.city}, {self.region_name}, {self.country}%NC%"

@property
def datetime(self) -> str:
Expand Down
110 changes: 61 additions & 49 deletions src/main/askai/core/component/internet_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import List

import bs4
import openai
from askai.__classpath__ import API_KEYS
from askai.core.askai_configs import configs
from askai.core.askai_events import events
Expand Down Expand Up @@ -60,16 +61,17 @@ class InternetService(metaclass=Singleton):
CATEGORY_ICONS = {
"Weather": "",
"Sports": "醴",
"News": "",
"News": "",
"Celebrities": "",
"People": "",
"Programming": "",
"Travel": "",
"General": "",
"Maps": "",
}

SITE_ICONS = defaultdict(str, {
"linkedin.com": "",
"linkedin.com": "",
"github.com": "",
"instagram.com": "",
"x.com": "X",
Expand Down Expand Up @@ -100,15 +102,16 @@ def wrap_response(cls, terms: str, output: str, result: SearchResult) -> str:
:return: A formatted string that encapsulates the search response.
"""
terms: str = re.sub(r"\s{2,}", " ", terms)
sites: set[str] = set(re.findall(r"site:(.+?\..+?)\s+", terms) + result.sites)
sources: str = " ".join(
filter(len, sorted(set(f"{s.replace(s, cls.SITE_ICONS[s]):<2}" or s for s in result.sites), key=len))
filter(len, set(sorted([f"{s.replace(s, cls.SITE_ICONS[s]):<2}".strip() or s for s in sites], key=len)))
)
# fmt: off
return (
f"Your {result.engine.title()} search returned the following:\n\n"
f"{output}\n\n---\n\n"
f"`{cls.CATEGORY_ICONS[result.category]} {result.category}` Sources: {sources} "
f"*Accessed: {geo_location.location} {now('%d %B, %Y')}*\n\n"
f"Your {result.engine.title()} search has returned the following results:"
f"\n\n{output}\n\n---\n\n"
f"`{cls.CATEGORY_ICONS[result.category]:<2} {result.category}` **Sources:** {sources} "
f"**Access:** {geo_location.location} - *{now('%B %d, %Y')}*\n\n"
f">  Terms: {terms}")
# fmt: on

Expand All @@ -119,27 +122,30 @@ def _build_google_query(search: SearchResult) -> str:
:return: A string representing the constructed Google Search query.
"""
# The order of conditions is important here, as the execution may stop early if a condition is met.
final_query = ""
google_query = ""
match search.category.casefold():
case "people":
if any((f.find("people:") >= 0 for f in search.filters)):
final_query = f' intext:"{next((f for f in search.filters if f.startswith("people:")), None)}"'
final_query += " AND description AND information AND background"
if any((f.find("intext:") >= 0 for f in search.filters)):
google_query = (
"description AND background AND work AND achievements "
f'{next((f for f in search.filters if f.startswith("intext:")), None)}'
)
case "weather":
if any((f.find("weather:") >= 0 for f in search.filters)):
final_query = f' weather:"{next((f for f in search.filters if f.startswith("weather:")), None)}"'
final_query += " AND forecast"
# Gather the sites to be used in the search.
sites = f"{' OR '.join(set('site:' + url for url in search.sites))}"
# Make the final search query use the provided keywords.
if search.keywords:
final_query = f"{' '.join(set(search.keywords))} {sites} {final_query}"
google_query = (
f'{now("%B %d %Y")} {next((f for f in search.filters if f.startswith("weather:")), None)}'
)
case _:
if search.keywords:
# Gather the sites to be used in the search.
sites = f"{' OR '.join(set('site:' + url for url in search.sites))}"
google_query = f"{' '.join(set(sorted(search.keywords)))} {sites}"

return final_query
return google_query

def __init__(self):
API_KEYS.ensure("GOOGLE_API_KEY", "google_search")
self._google = GoogleSearchAPIWrapper(google_api_key=API_KEYS.GOOGLE_API_KEY)
self._google = GoogleSearchAPIWrapper(k=10, google_api_key=API_KEYS.GOOGLE_API_KEY)
self._tool = Tool(name="google_search", description="Search Google for recent results.", func=self._google.run)
self._text_splitter = RecursiveCharacterTextSplitter(
chunk_size=configs.chunk_size, chunk_overlap=configs.chunk_overlap
Expand All @@ -159,21 +165,50 @@ def google_search(self, search: SearchResult) -> str:
events.reply.emit(reply=AIReply.info(msg.searching()))
search.sites = search.sites or ["google.com", "bing.com", "duckduckgo.com", "ask.com"]
terms: str = self._build_google_query(search).strip()
question: str = re.sub(r"(\w+:)*|((\w+\.\w+)*)", "", terms, flags=re.DOTALL | re.MULTILINE)
try:
log.info("Searching Google for '%s'", terms)
events.reply.emit(reply=AIReply.debug(msg.final_query(terms)))
ctx = str(self._tool.run(terms))
llm_prompt = ChatPromptTemplate.from_messages([("system", "{query}\n\n{context}")])
context: List[Document] = [Document(ctx)]
results: list[str] = str(self._tool.run(terms, verbose=configs.is_debug)).split(" ")
llm_prompt = ChatPromptTemplate.from_messages(
[
("system", "Use the following context to answer the question at the end:\\n\\n{context}"),
("human", "{question}"),
]
)
docs: List[Document] = [Document(d) for d in results]
chain = create_stuff_documents_chain(
lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp), llm_prompt
lc_llm.create_chat_model(temperature=Temperature.COLDEST.temp), llm_prompt
)
output = chain.invoke({"query": search.question, "context": context})
except HttpError as err:
output = chain.invoke({"question": question, "context": docs})
except (HttpError, openai.APIError) as err:
return msg.fail_to_search(str(err))

return self.refine_search(terms, output, search)

def refine_search(self, terms: str, response: str, search: SearchResult) -> str:
"""Refine the text retrieved by the search engine.
:param terms: The search terms used in the search.
:param response: The internet search response.
:param search: The search result object.
:return: A refined version of the search result text, tailored to better answer the user's question.
"""
refine_prompt = PromptTemplate.from_template(self.refine_template).format(
idiom=shared.idiom,
sources=search.sites,
location=geo_location.location,
datetime=geo_location.datetime,
result=response,
question=search.question,
)
log.info("STT::[QUESTION] '%s'", response)
llm = lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp)

if (response := llm.invoke(refine_prompt)) and (output := response.content):
return self.wrap_response(terms, output, search)

return msg.no_good_result()

def scrap_sites(self, search: SearchResult) -> str:
"""Scrape a web page and summarize its contents.
:param search: The AI search parameters encapsulated in a SearchResult object.
Expand Down Expand Up @@ -210,28 +245,5 @@ def _format_docs(docs) -> str:
return self.refine_search(search.question, str(output), search)
return msg.no_output("search")

def refine_search(self, terms: str, response: str, search: SearchResult) -> str:
"""Refine the text retrieved by the search engine.
:param terms: The search terms used in the search.
:param response: The internet search response.
:param search: The search result object.
:return: A refined version of the search result text, tailored to better answer the user's question.
"""
refine_prompt = PromptTemplate.from_template(self.refine_template).format(
idiom=shared.idiom,
sources=search.sites,
location=geo_location.location,
datetime=geo_location.datetime,
result=response,
question=search.question,
)
log.info("STT::[QUESTION] '%s'", response)
llm = lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp)

if (response := llm.invoke(refine_prompt)) and (output := response.content):
return self.wrap_response(terms, output, search)

return msg.no_good_result()


assert (internet := InternetService().INSTANCE) is not None
22 changes: 13 additions & 9 deletions src/main/askai/core/enums/acc_color.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ class AccColor(Enumeration):

INTERRUPT = 'Black', -1

EXCELLENT = 'Blue', 0
TERMINATE = 'White', 0

GOOD = 'Green', 1
EXCELLENT = 'Blue', 1

MODERATE = 'Yellow', 2
GOOD = 'Green', 2

INCOMPLETE = 'Orange', 3
MODERATE = 'Yellow', 3

BAD = 'Red', 4

Expand Down Expand Up @@ -80,9 +80,17 @@ def val(self) -> int:
"""
return int(self.value[1])

@property
def is_terminate(self) -> bool:
return self == self.TERMINATE

@property
def is_interrupt(self) -> bool:
return self == self.INTERRUPT

@property
def is_bad(self) -> bool:
return self in [self.BAD, self.INCOMPLETE]
return self == self.BAD

@property
def is_moderate(self) -> bool:
Expand All @@ -92,10 +100,6 @@ def is_moderate(self) -> bool:
def is_good(self) -> bool:
return self in [self.GOOD, self.EXCELLENT]

@property
def is_interrupt(self) -> bool:
return self == self.INTERRUPT

def passed(self, threshold: "AccColor") -> bool:
"""Determine whether the response matches a 'PASS' classification.
:param threshold: The threshold or criteria used to determine a 'PASS' classification.
Expand Down
2 changes: 1 addition & 1 deletion src/main/askai/core/enums/router_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class RouterMode(Enumeration):

RAG = "Retrieval-Augmented-Generation", "", rag

CHAT = "Taius Chat", "", chat
CHAT = "Taius Chat", "", chat

# fmt: on

Expand Down
2 changes: 1 addition & 1 deletion src/main/askai/core/features/processors/task_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _splitter_wrapper_() -> Optional[str]:
try:
agent_output = self._process_tasks(task_list)
acc_response: AccResponse = assert_accuracy(question, agent_output, AccColor.MODERATE)
except (InterruptionRequest, TerminatingQuery) as err:
except InterruptionRequest as err:
return str(err)
except self.RETRIABLE_ERRORS:
events.reply.emit(reply=AIReply.error(msg.sorry_retry()))
Expand Down
5 changes: 4 additions & 1 deletion src/main/askai/core/features/router/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from askai.core.support.langchain_support import lc_llm
from askai.core.support.rag_provider import RAGProvider
from askai.core.support.shared_instances import shared
from askai.exception.exceptions import InaccurateResponse, InterruptionRequest
from askai.exception.exceptions import InaccurateResponse, InterruptionRequest, TerminatingQuery
from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain_core.runnables.history import RunnableWithMessageHistory
Expand Down Expand Up @@ -71,6 +71,9 @@ def assert_accuracy(question: str, ai_response: str, pass_threshold: AccColor =
# AI flags that it can't continue interacting.
log.warning(msg.interruption_requested(output))
raise InterruptionRequest(ai_response)
elif acc.is_terminate:
# AI flags that the user wants to end the session.
raise TerminatingQuery(ai_response)
elif not acc.is_pass(pass_threshold):
# Include the guidelines for the first mistake.
if not shared.context.get("EVALUATION"):
Expand Down
4 changes: 4 additions & 0 deletions src/main/askai/core/model/acc_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ def details(self) -> str:
def is_interrupt(self) -> bool:
return self.acc_color.is_interrupt

@property
def is_terminate(self) -> bool:
return self.acc_color.is_terminate

def is_pass(self, threshold: AccColor) -> bool:
"""Determine whether the response matches a 'PASS' classification.
:param threshold: The threshold or criteria used to determine a 'PASS' classification.
Expand Down
10 changes: 5 additions & 5 deletions src/main/askai/core/support/text_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class TextFormatter(metaclass=Singleton):
RE_MD_CODE_BLOCK = r"(```.+```)"

CHAT_ICONS = {
"": " Oops! %NC%",
"": "%RED% Oops! %NC%",
"": "\n>  *Tip:* ",
"": "\n>  *Analysis:* ",
"": "\n>  *Summary:* ",
Expand All @@ -53,7 +53,7 @@ class TextFormatter(metaclass=Singleton):
RE_TYPES = {
"MD": RE_MD_CODE_BLOCK,
"": RE_URL,
"": r"[\s*_]*Errors?[_*-:\s]+",
"": r"[\s*_]*Errors?[_*-:\s]+",
"": r"[\s*_]*Hints?( ([Aa]nd|&) [Tt]ips?)?[_*-:\s]+",
"": r"[\s*_]*Analysis[_*-:\s]+",
"": r"[\s*_]*Summary[_*-:\s]+",
Expand Down Expand Up @@ -110,7 +110,7 @@ def beautify(self, text: Any) -> str:
# fmt: off

text = dedent(str(text)).strip()
text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
Expand All @@ -121,8 +121,8 @@ def beautify(self, text: Any) -> str:
text = re.sub(self.RE_TYPES[''], r" [\1](\1)", text)
text = re.sub(self.RE_TYPES['MD'], r"\n\1\n", text)
text = re.sub(r'```(.+)```\s+', r"\n```\1```\n", text)
text = text.replace(os.getenv("USER", "user"), f'`{os.getenv("USER", "user")}`')
text = text.replace("Taius", f'**Taius**')
text = re.sub(rf"\s+{os.getenv('USER', 'user')}", f'` {os.getenv("USER", "user")}`', text)
text = re.sub(r"\s+Taius", f' **Taius**', text)

# fmt: on

Expand Down
37 changes: 26 additions & 11 deletions src/main/askai/resources/prompts/evaluation.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,38 @@ Avoid using markdown or any special formatting in your response.

Use the following criteria for classification:

1. **Blue**: Level of accuracy [100%-95%]. Reasoning: The AI response is perfect responding to the question posed, including a detailed and accurate information.
1. **Black:** Level of accuracy [0%-100%]. Reasoning: The AI responds that it cannot continue with further interactions due to a specified reason.

1. **Green**: Level of accuracy [94%-70%]. Reasoning: The AI response successfully addresses the question posed, indicating a full understanding and appropriate analysis.
2. **White:** Level of accuracy [0%-100%]. Reasoning: Only used when the user has clearly requested to end the session; this is the code that will cause the application to exit.

2. **Yellow**: Level of accuracy [69%-50%]. Reasoning: The AI response partially addresses the question but lacks full depth or detail, suggesting moderate understanding.
3. **Blue:** Level of accuracy [100%-95%]. Reasoning: The AI response is perfect responding to the question posed, including a detailed and accurate information.

3. **Orange**: Level of accuracy [49%-30%]. Reasoning: The AI response is incomplete or if you have low confidence of the classification.
4. **Green:** Level of accuracy [94%-70%]. Reasoning: The AI response successfully addresses the question posed, indicating a full understanding and appropriate analysis.

4. **Red**: Level of accuracy [29%-0%]. Reasoning: The AI response fails to adequately address the question, indicating a misunderstanding or incorrect analysis.
5. **Yellow:** Level of accuracy [69%-50%]. Reasoning: The AI response partially addresses the question but lacks full depth or detail, suggesting moderate understanding.

5. **Black**: Level of accuracy [Any]. Reasoning: The AI responds that it cannot continue with further interactions due to a specified reason.
6. **Red:** Level of accuracy [29%-0%]. Reasoning: The AI response fails to adequately address the question, indicating a misunderstanding or incorrect analysis.


**When evaluating responses, classify 'Black' (Interrupt Responses) when:**
**When evaluating responses, classify 'Black' when:**

- The response explains the lack of information, context, or when the AI is clearly having trouble understanding the user input.
- If the response indicates a lack of information or context, ask clarifying questions to gather more details.

- The response is a negative answer to the question.
- When the AI struggles to understand user input, guide the conversation by requesting clarification, examples, or rephrasing of the question.

- If more context is needed, ask the user to expand or provide specific information to ensure an accurate response.

**When evaluating responses, classify 'Green' or 'Blue' (Known Good/Excellent Responses) when:**
- When the AI responds "negatively" to the question.

- When providing search results or insights, focus on neutral and helpful responses rather than stating that no information is available.


**When evaluating responses, classify 'White' when:**

- The user intends to terminate the session.


**When evaluating responses, classify 'Green' or 'Blue' when:**

- 'Blue' if it detects a successful command execution, e.g., "OK, command <any command> succeeded". The "command output" can be disregarded for this classification.

Expand All @@ -43,7 +54,7 @@ Use the following criteria for classification:
- Regardless of the question, if the response includes the phrase: "Your search returned the following:".


**When evaluating responses, classify 'Red' (Known Bad Responses) when:**
**When evaluating responses, classify 'Red' when:**

- The response fails to resolve the primary goal; 'Orange' if the response fails to resolve any of the sub goals.

Expand All @@ -59,8 +70,12 @@ as 'Red' if it does not align with known facts.
- The AI is seeking user assistance.


<<ATTENTION>> If a classification has already been determined (i.e., if any of the above criteria have been met), return immediately to the user.


**Classification Guidelines (rate from 0% to 100%):**


- Assess the AI's response for correctness by considering its ability to effectively address and correct syntax errors or misinterpretations in the user's input, rather than focusing solely on literal repetitions or minor discrepancies in terminology.

- Revise the classifications for responses from the AI that contain irrelevant information to 'Yellow' instead of 'Red', as any additional information is still valued.
Expand Down
Loading

0 comments on commit 7153aaa

Please sign in to comment.