From 7153aaa998678032badf07af26ea1ed2ff8a6c91 Mon Sep 17 00:00:00 2001 From: Hugo Saporetti Junior Date: Sat, 28 Sep 2024 01:48:35 -0300 Subject: [PATCH] Improve internet search, evaluations (added white) and fixed black --- src/main/askai/core/component/geo_location.py | 2 +- .../askai/core/component/internet_service.py | 110 ++++++++++-------- src/main/askai/core/enums/acc_color.py | 22 ++-- src/main/askai/core/enums/router_mode.py | 2 +- .../core/features/processors/task_splitter.py | 2 +- .../askai/core/features/router/evaluation.py | 5 +- src/main/askai/core/model/acc_response.py | 4 + src/main/askai/core/support/text_formatter.py | 10 +- .../askai/resources/prompts/evaluation.txt | 37 ++++-- .../askai/resources/prompts/refine-search.txt | 6 +- .../resources/prompts/search-builder.txt | 17 +-- src/main/askai/resources/rag/accuracy.csv | 1 + 12 files changed, 127 insertions(+), 91 deletions(-) diff --git a/src/main/askai/core/component/geo_location.py b/src/main/askai/core/component/geo_location.py index 00c1fbad..9a1fbb14 100644 --- a/src/main/askai/core/component/geo_location.py +++ b/src/main/askai/core/component/geo_location.py @@ -110,7 +110,7 @@ def timezone(self) -> str: @property def location(self) -> str: - return f"{self.city}, {self.region_name} {self.country}" + return f"%CYAN%{self.city}, {self.region_name}, {self.country}%NC%" @property def datetime(self) -> str: diff --git a/src/main/askai/core/component/internet_service.py b/src/main/askai/core/component/internet_service.py index e639d7d1..dd3ff967 100644 --- a/src/main/askai/core/component/internet_service.py +++ b/src/main/askai/core/component/internet_service.py @@ -19,6 +19,7 @@ from typing import List import bs4 +import openai from askai.__classpath__ import API_KEYS from askai.core.askai_configs import configs from askai.core.askai_events import events @@ -60,16 +61,17 @@ class InternetService(metaclass=Singleton): CATEGORY_ICONS = { "Weather": "", "Sports": "醴", - "News": "", + "News": "", "Celebrities": "", "People": "", "Programming": "", "Travel": "", "General": "", + "Maps": "", } SITE_ICONS = defaultdict(str, { - "linkedin.com": "", + "linkedin.com": "", "github.com": "", "instagram.com": "", "x.com": "X", @@ -100,15 +102,16 @@ def wrap_response(cls, terms: str, output: str, result: SearchResult) -> str: :return: A formatted string that encapsulates the search response. """ terms: str = re.sub(r"\s{2,}", " ", terms) + sites: set[str] = set(re.findall(r"site:(.+?\..+?)\s+", terms) + result.sites) sources: str = " ".join( - filter(len, sorted(set(f"{s.replace(s, cls.SITE_ICONS[s]):<2}" or s for s in result.sites), key=len)) + filter(len, set(sorted([f"{s.replace(s, cls.SITE_ICONS[s]):<2}".strip() or s for s in sites], key=len))) ) # fmt: off return ( - f"Your {result.engine.title()} search returned the following:\n\n" - f"{output}\n\n---\n\n" - f"`{cls.CATEGORY_ICONS[result.category]} {result.category}` Sources: {sources} " - f"*Accessed: {geo_location.location} {now('%d %B, %Y')}*\n\n" + f"Your {result.engine.title()} search has returned the following results:" + f"\n\n{output}\n\n---\n\n" + f"`{cls.CATEGORY_ICONS[result.category]:<2} {result.category}` **Sources:** {sources} " + f"**Access:** {geo_location.location} - *{now('%B %d, %Y')}*\n\n" f">  Terms: {terms}") # fmt: on @@ -119,27 +122,30 @@ def _build_google_query(search: SearchResult) -> str: :return: A string representing the constructed Google Search query. """ # The order of conditions is important here, as the execution may stop early if a condition is met. - final_query = "" + google_query = "" match search.category.casefold(): case "people": - if any((f.find("people:") >= 0 for f in search.filters)): - final_query = f' intext:"{next((f for f in search.filters if f.startswith("people:")), None)}"' - final_query += " AND description AND information AND background" + if any((f.find("intext:") >= 0 for f in search.filters)): + google_query = ( + "description AND background AND work AND achievements " + f'{next((f for f in search.filters if f.startswith("intext:")), None)}' + ) case "weather": if any((f.find("weather:") >= 0 for f in search.filters)): - final_query = f' weather:"{next((f for f in search.filters if f.startswith("weather:")), None)}"' - final_query += " AND forecast" - # Gather the sites to be used in the search. - sites = f"{' OR '.join(set('site:' + url for url in search.sites))}" - # Make the final search query use the provided keywords. - if search.keywords: - final_query = f"{' '.join(set(search.keywords))} {sites} {final_query}" + google_query = ( + f'{now("%B %d %Y")} {next((f for f in search.filters if f.startswith("weather:")), None)}' + ) + case _: + if search.keywords: + # Gather the sites to be used in the search. + sites = f"{' OR '.join(set('site:' + url for url in search.sites))}" + google_query = f"{' '.join(set(sorted(search.keywords)))} {sites}" - return final_query + return google_query def __init__(self): API_KEYS.ensure("GOOGLE_API_KEY", "google_search") - self._google = GoogleSearchAPIWrapper(google_api_key=API_KEYS.GOOGLE_API_KEY) + self._google = GoogleSearchAPIWrapper(k=10, google_api_key=API_KEYS.GOOGLE_API_KEY) self._tool = Tool(name="google_search", description="Search Google for recent results.", func=self._google.run) self._text_splitter = RecursiveCharacterTextSplitter( chunk_size=configs.chunk_size, chunk_overlap=configs.chunk_overlap @@ -159,21 +165,50 @@ def google_search(self, search: SearchResult) -> str: events.reply.emit(reply=AIReply.info(msg.searching())) search.sites = search.sites or ["google.com", "bing.com", "duckduckgo.com", "ask.com"] terms: str = self._build_google_query(search).strip() + question: str = re.sub(r"(\w+:)*|((\w+\.\w+)*)", "", terms, flags=re.DOTALL | re.MULTILINE) try: log.info("Searching Google for '%s'", terms) events.reply.emit(reply=AIReply.debug(msg.final_query(terms))) - ctx = str(self._tool.run(terms)) - llm_prompt = ChatPromptTemplate.from_messages([("system", "{query}\n\n{context}")]) - context: List[Document] = [Document(ctx)] + results: list[str] = str(self._tool.run(terms, verbose=configs.is_debug)).split(" ") + llm_prompt = ChatPromptTemplate.from_messages( + [ + ("system", "Use the following context to answer the question at the end:\\n\\n{context}"), + ("human", "{question}"), + ] + ) + docs: List[Document] = [Document(d) for d in results] chain = create_stuff_documents_chain( - lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp), llm_prompt + lc_llm.create_chat_model(temperature=Temperature.COLDEST.temp), llm_prompt ) - output = chain.invoke({"query": search.question, "context": context}) - except HttpError as err: + output = chain.invoke({"question": question, "context": docs}) + except (HttpError, openai.APIError) as err: return msg.fail_to_search(str(err)) return self.refine_search(terms, output, search) + def refine_search(self, terms: str, response: str, search: SearchResult) -> str: + """Refine the text retrieved by the search engine. + :param terms: The search terms used in the search. + :param response: The internet search response. + :param search: The search result object. + :return: A refined version of the search result text, tailored to better answer the user's question. + """ + refine_prompt = PromptTemplate.from_template(self.refine_template).format( + idiom=shared.idiom, + sources=search.sites, + location=geo_location.location, + datetime=geo_location.datetime, + result=response, + question=search.question, + ) + log.info("STT::[QUESTION] '%s'", response) + llm = lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp) + + if (response := llm.invoke(refine_prompt)) and (output := response.content): + return self.wrap_response(terms, output, search) + + return msg.no_good_result() + def scrap_sites(self, search: SearchResult) -> str: """Scrape a web page and summarize its contents. :param search: The AI search parameters encapsulated in a SearchResult object. @@ -210,28 +245,5 @@ def _format_docs(docs) -> str: return self.refine_search(search.question, str(output), search) return msg.no_output("search") - def refine_search(self, terms: str, response: str, search: SearchResult) -> str: - """Refine the text retrieved by the search engine. - :param terms: The search terms used in the search. - :param response: The internet search response. - :param search: The search result object. - :return: A refined version of the search result text, tailored to better answer the user's question. - """ - refine_prompt = PromptTemplate.from_template(self.refine_template).format( - idiom=shared.idiom, - sources=search.sites, - location=geo_location.location, - datetime=geo_location.datetime, - result=response, - question=search.question, - ) - log.info("STT::[QUESTION] '%s'", response) - llm = lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp) - - if (response := llm.invoke(refine_prompt)) and (output := response.content): - return self.wrap_response(terms, output, search) - - return msg.no_good_result() - assert (internet := InternetService().INSTANCE) is not None diff --git a/src/main/askai/core/enums/acc_color.py b/src/main/askai/core/enums/acc_color.py index cf730a36..1b5e3aea 100644 --- a/src/main/askai/core/enums/acc_color.py +++ b/src/main/askai/core/enums/acc_color.py @@ -26,13 +26,13 @@ class AccColor(Enumeration): INTERRUPT = 'Black', -1 - EXCELLENT = 'Blue', 0 + TERMINATE = 'White', 0 - GOOD = 'Green', 1 + EXCELLENT = 'Blue', 1 - MODERATE = 'Yellow', 2 + GOOD = 'Green', 2 - INCOMPLETE = 'Orange', 3 + MODERATE = 'Yellow', 3 BAD = 'Red', 4 @@ -80,9 +80,17 @@ def val(self) -> int: """ return int(self.value[1]) + @property + def is_terminate(self) -> bool: + return self == self.TERMINATE + + @property + def is_interrupt(self) -> bool: + return self == self.INTERRUPT + @property def is_bad(self) -> bool: - return self in [self.BAD, self.INCOMPLETE] + return self == self.BAD @property def is_moderate(self) -> bool: @@ -92,10 +100,6 @@ def is_moderate(self) -> bool: def is_good(self) -> bool: return self in [self.GOOD, self.EXCELLENT] - @property - def is_interrupt(self) -> bool: - return self == self.INTERRUPT - def passed(self, threshold: "AccColor") -> bool: """Determine whether the response matches a 'PASS' classification. :param threshold: The threshold or criteria used to determine a 'PASS' classification. diff --git a/src/main/askai/core/enums/router_mode.py b/src/main/askai/core/enums/router_mode.py index a797c628..977b118c 100644 --- a/src/main/askai/core/enums/router_mode.py +++ b/src/main/askai/core/enums/router_mode.py @@ -44,7 +44,7 @@ class RouterMode(Enumeration): RAG = "Retrieval-Augmented-Generation", "", rag - CHAT = "Taius Chat", "", chat + CHAT = "Taius Chat", "", chat # fmt: on diff --git a/src/main/askai/core/features/processors/task_splitter.py b/src/main/askai/core/features/processors/task_splitter.py index 93b1e687..5dcbbb95 100644 --- a/src/main/askai/core/features/processors/task_splitter.py +++ b/src/main/askai/core/features/processors/task_splitter.py @@ -186,7 +186,7 @@ def _splitter_wrapper_() -> Optional[str]: try: agent_output = self._process_tasks(task_list) acc_response: AccResponse = assert_accuracy(question, agent_output, AccColor.MODERATE) - except (InterruptionRequest, TerminatingQuery) as err: + except InterruptionRequest as err: return str(err) except self.RETRIABLE_ERRORS: events.reply.emit(reply=AIReply.error(msg.sorry_retry())) diff --git a/src/main/askai/core/features/router/evaluation.py b/src/main/askai/core/features/router/evaluation.py index be0ff37d..a0d3fd56 100644 --- a/src/main/askai/core/features/router/evaluation.py +++ b/src/main/askai/core/features/router/evaluation.py @@ -23,7 +23,7 @@ from askai.core.support.langchain_support import lc_llm from askai.core.support.rag_provider import RAGProvider from askai.core.support.shared_instances import shared -from askai.exception.exceptions import InaccurateResponse, InterruptionRequest +from askai.exception.exceptions import InaccurateResponse, InterruptionRequest, TerminatingQuery from langchain_core.messages import AIMessage from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate from langchain_core.runnables.history import RunnableWithMessageHistory @@ -71,6 +71,9 @@ def assert_accuracy(question: str, ai_response: str, pass_threshold: AccColor = # AI flags that it can't continue interacting. log.warning(msg.interruption_requested(output)) raise InterruptionRequest(ai_response) + elif acc.is_terminate: + # AI flags that the user wants to end the session. + raise TerminatingQuery(ai_response) elif not acc.is_pass(pass_threshold): # Include the guidelines for the first mistake. if not shared.context.get("EVALUATION"): diff --git a/src/main/askai/core/model/acc_response.py b/src/main/askai/core/model/acc_response.py index 6c787b7c..13cf75ce 100644 --- a/src/main/askai/core/model/acc_response.py +++ b/src/main/askai/core/model/acc_response.py @@ -76,6 +76,10 @@ def details(self) -> str: def is_interrupt(self) -> bool: return self.acc_color.is_interrupt + @property + def is_terminate(self) -> bool: + return self.acc_color.is_terminate + def is_pass(self, threshold: AccColor) -> bool: """Determine whether the response matches a 'PASS' classification. :param threshold: The threshold or criteria used to determine a 'PASS' classification. diff --git a/src/main/askai/core/support/text_formatter.py b/src/main/askai/core/support/text_formatter.py index 7451c3fc..e3621aec 100644 --- a/src/main/askai/core/support/text_formatter.py +++ b/src/main/askai/core/support/text_formatter.py @@ -40,7 +40,7 @@ class TextFormatter(metaclass=Singleton): RE_MD_CODE_BLOCK = r"(```.+```)" CHAT_ICONS = { - "": " Oops! %NC%", + "": "%RED% Oops! %NC%", "": "\n>  *Tip:* ", "": "\n>  *Analysis:* ", "": "\n>  *Summary:* ", @@ -53,7 +53,7 @@ class TextFormatter(metaclass=Singleton): RE_TYPES = { "MD": RE_MD_CODE_BLOCK, "": RE_URL, - "": r"[\s*_]*Errors?[_*-:\s]+", + "": r"[\s*_]*Errors?[_*-:\s]+", "": r"[\s*_]*Hints?( ([Aa]nd|&) [Tt]ips?)?[_*-:\s]+", "": r"[\s*_]*Analysis[_*-:\s]+", "": r"[\s*_]*Summary[_*-:\s]+", @@ -110,7 +110,7 @@ def beautify(self, text: Any) -> str: # fmt: off text = dedent(str(text)).strip() - text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text) + text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text) text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text) text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text) text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text) @@ -121,8 +121,8 @@ def beautify(self, text: Any) -> str: text = re.sub(self.RE_TYPES[''], r" [\1](\1)", text) text = re.sub(self.RE_TYPES['MD'], r"\n\1\n", text) text = re.sub(r'```(.+)```\s+', r"\n```\1```\n", text) - text = text.replace(os.getenv("USER", "user"), f'`{os.getenv("USER", "user")}`') - text = text.replace("Taius", f'**Taius**') + text = re.sub(rf"\s+{os.getenv('USER', 'user')}", f'` {os.getenv("USER", "user")}`', text) + text = re.sub(r"\s+Taius", f' **Taius**', text) # fmt: on diff --git a/src/main/askai/resources/prompts/evaluation.txt b/src/main/askai/resources/prompts/evaluation.txt index 5e138012..16e8846d 100644 --- a/src/main/askai/resources/prompts/evaluation.txt +++ b/src/main/askai/resources/prompts/evaluation.txt @@ -12,27 +12,38 @@ Avoid using markdown or any special formatting in your response. Use the following criteria for classification: -1. **Blue**: Level of accuracy [100%-95%]. Reasoning: The AI response is perfect responding to the question posed, including a detailed and accurate information. +1. **Black:** Level of accuracy [0%-100%]. Reasoning: The AI responds that it cannot continue with further interactions due to a specified reason. -1. **Green**: Level of accuracy [94%-70%]. Reasoning: The AI response successfully addresses the question posed, indicating a full understanding and appropriate analysis. +2. **White:** Level of accuracy [0%-100%]. Reasoning: Only used when the user has clearly requested to end the session; this is the code that will cause the application to exit. -2. **Yellow**: Level of accuracy [69%-50%]. Reasoning: The AI response partially addresses the question but lacks full depth or detail, suggesting moderate understanding. +3. **Blue:** Level of accuracy [100%-95%]. Reasoning: The AI response is perfect responding to the question posed, including a detailed and accurate information. -3. **Orange**: Level of accuracy [49%-30%]. Reasoning: The AI response is incomplete or if you have low confidence of the classification. +4. **Green:** Level of accuracy [94%-70%]. Reasoning: The AI response successfully addresses the question posed, indicating a full understanding and appropriate analysis. -4. **Red**: Level of accuracy [29%-0%]. Reasoning: The AI response fails to adequately address the question, indicating a misunderstanding or incorrect analysis. +5. **Yellow:** Level of accuracy [69%-50%]. Reasoning: The AI response partially addresses the question but lacks full depth or detail, suggesting moderate understanding. -5. **Black**: Level of accuracy [Any]. Reasoning: The AI responds that it cannot continue with further interactions due to a specified reason. +6. **Red:** Level of accuracy [29%-0%]. Reasoning: The AI response fails to adequately address the question, indicating a misunderstanding or incorrect analysis. -**When evaluating responses, classify 'Black' (Interrupt Responses) when:** +**When evaluating responses, classify 'Black' when:** -- The response explains the lack of information, context, or when the AI is clearly having trouble understanding the user input. +- If the response indicates a lack of information or context, ask clarifying questions to gather more details. -- The response is a negative answer to the question. +- When the AI struggles to understand user input, guide the conversation by requesting clarification, examples, or rephrasing of the question. +- If more context is needed, ask the user to expand or provide specific information to ensure an accurate response. -**When evaluating responses, classify 'Green' or 'Blue' (Known Good/Excellent Responses) when:** +- When the AI responds "negatively" to the question. + +- When providing search results or insights, focus on neutral and helpful responses rather than stating that no information is available. + + +**When evaluating responses, classify 'White' when:** + +- The user intends to terminate the session. + + +**When evaluating responses, classify 'Green' or 'Blue' when:** - 'Blue' if it detects a successful command execution, e.g., "OK, command succeeded". The "command output" can be disregarded for this classification. @@ -43,7 +54,7 @@ Use the following criteria for classification: - Regardless of the question, if the response includes the phrase: "Your search returned the following:". -**When evaluating responses, classify 'Red' (Known Bad Responses) when:** +**When evaluating responses, classify 'Red' when:** - The response fails to resolve the primary goal; 'Orange' if the response fails to resolve any of the sub goals. @@ -59,8 +70,12 @@ as 'Red' if it does not align with known facts. - The AI is seeking user assistance. +<> If a classification has already been determined (i.e., if any of the above criteria have been met), return immediately to the user. + + **Classification Guidelines (rate from 0% to 100%):** + - Assess the AI's response for correctness by considering its ability to effectively address and correct syntax errors or misinterpretations in the user's input, rather than focusing solely on literal repetitions or minor discrepancies in terminology. - Revise the classifications for responses from the AI that contain irrelevant information to 'Yellow' instead of 'Red', as any additional information is still valued. diff --git a/src/main/askai/resources/prompts/refine-search.txt b/src/main/askai/resources/prompts/refine-search.txt index 8d64db64..607d80fb 100644 --- a/src/main/askai/resources/prompts/refine-search.txt +++ b/src/main/askai/resources/prompts/refine-search.txt @@ -21,7 +21,7 @@ Refine the existing response by adding more relevant details and ensuring the ex - Enhance the response using Markdown to format single-line code blocks for brief code snippets and multi-line code blocks for extensive code sections. Emphasize key elements or important stuff in **bold** and names in *italic*. When the response is already a markdown formatted text, just ensure everything is neat. 4. **Leave it Untouched:** - - If no improvements are possible, return the result as is without any extraneous explanation or comments. + - If no improvements are possible, return the result as is without any extraneous explanation or comments. <> @@ -32,7 +32,9 @@ Refine the existing response by adding more relevant details and ensuring the ex - Avoid expressing helpfulness or offering assistance. -Internet Search Result: "{result}" +Internet Search Result: + +{result} User Question: "{question}" diff --git a/src/main/askai/resources/prompts/search-builder.txt b/src/main/askai/resources/prompts/search-builder.txt index c1b0d857..ad4da562 100644 --- a/src/main/askai/resources/prompts/search-builder.txt +++ b/src/main/askai/resources/prompts/search-builder.txt @@ -9,7 +9,8 @@ Given the following icon/categories: 5. People 6. Programming 7. Travel -8. General +8. Maps +9. General Select **one** category that most fits the request from the provided options. @@ -27,21 +28,15 @@ Your task is to respond to a user query following the steps below. You MUST foll 3. **Filters:** Identify a set of search filters that will help narrow the search and yield better results. -4. **Source Selection:** Retrieve at least four credible sources relevant to the question. These sources should be tailored to the user's location {location} and locale '{idiom}'. +4. **Source Selection:** Retrieve credible sources relevant to the question. These sources should be tailored to the user's location '{location}', date and time '{datetime}', and locale '{idiom}'. -5. **Personal Inquiries:** For inquiries related to non notorious individuals, extract the name of the referenced person and add the filter: 'people:' to your list. Default to the following websites if none are mentioned: "github.com", "linkedin.com", "facebook.com", "instagram.com", "tiktok.com", "x.com". +5. **Personal Inquiries:** For inquiries related to non notorious individuals, **EXTRACT THE EXACT NAME** of the referenced person **WITHOUT MODIFYING** IT and add the filter: intext:"" to your list. Default to the following websites if none are mentioned: "github.com", "linkedin.com", "facebook.com", "instagram.com", "tiktok.com", "x.com". 6. **Technical Inquiries:** If the query relates to programming languages, operating systems, or IT, default to using the following websites: "stackoverflow.com", "github.com". -7. **Weather Inquiries:** For weather-related inquiries, add the filter: 'weather:' to your list. Default to websites such as 'weather.com', 'accuweather.com', 'weather.gov'. +7. **Weather Inquiries:** For weather-related inquiries, add the filter: 'weather:""' to your list. Default to websites such as 'weather.com', 'accuweather.com', 'weather.gov'. -###Example### - -Question: "What are the best practices for securing a Python-based web application?" - -1. **Analyze**: The user wants security advice for Python web development. -2. **Keywords**: 'Python web security best practices', 'securing Python web apps'. -3. **Sources**: Find four credible sources, including stackoverflow.com and github.com, discussing best practices for web application security. +8. **Map Inquiries:** For map-related inquiries, add the filter: 'map:""' to your list. The response should follow this format: diff --git a/src/main/askai/resources/rag/accuracy.csv b/src/main/askai/resources/rag/accuracy.csv index a90cae4a..a9863ff7 100644 --- a/src/main/askai/resources/rag/accuracy.csv +++ b/src/main/askai/resources/rag/accuracy.csv @@ -21,3 +21,4 @@ List my music and let me know if there is any gabiroba song. If so, show me the Display the file name of the gabiroba song;The Gabiroba song file name is 'have-a-nice-day.mp3';Red;This is a clear hallucination because the filename does not conform to the required file pattern. What is my second task;The first task or action to be taken according to the reminder is to 'Thank Rafael Romão for the keyboard' at 17:20 on 04/11/2024.;Red;The AI partially addresses the user's question by providing information about the first task but lacks a direct response to the query about the second task. Who is Garirubal?;Your Google search returned the following:\n\nIt appears that there are currently no significant search results;Yellow;Internet searches can yield negative results or a lack of information, therefore, there is no definitive right or wrong. +Bye bye;The user requested to end the session.;White;The user requested to terminate the session, so, white is the right classification.