Improve internet search, evaluations (added white) and fixed black

yorevs · Sep 28, 2024 · 7153aaa · 7153aaa
1 parent 0c644be
commit 7153aaa
Show file tree

Hide file tree

Showing 12 changed files with 127 additions and 91 deletions.
diff --git a/src/main/askai/core/component/geo_location.py b/src/main/askai/core/component/geo_location.py
@@ -110,7 +110,7 @@ def timezone(self) -> str:
 
     @property
     def location(self) -> str:
-        return f"{self.city}, {self.region_name} {self.country}"
+        return f"%CYAN%{self.city}, {self.region_name}, {self.country}%NC%"
 
     @property
     def datetime(self) -> str:

diff --git a/src/main/askai/core/component/internet_service.py b/src/main/askai/core/component/internet_service.py
@@ -19,6 +19,7 @@
 from typing import List
 
 import bs4
+import openai
 from askai.__classpath__ import API_KEYS
 from askai.core.askai_configs import configs
 from askai.core.askai_events import events
@@ -60,16 +61,17 @@ class InternetService(metaclass=Singleton):
     CATEGORY_ICONS = {
         "Weather": "",
         "Sports": "醴",
-        "News": "",
+        "News": "",
         "Celebrities": "",
         "People": "",
         "Programming": "",
         "Travel": "",
         "General": "",
+        "Maps": "",
     }
 
     SITE_ICONS = defaultdict(str, {
-        "linkedin.com": "",
+        "linkedin.com": "",
         "github.com": "",
         "instagram.com": "",
         "x.com": "X",
@@ -100,15 +102,16 @@ def wrap_response(cls, terms: str, output: str, result: SearchResult) -> str:
         :return: A formatted string that encapsulates the search response.
         """
         terms: str = re.sub(r"\s{2,}", " ", terms)
+        sites: set[str] = set(re.findall(r"site:(.+?\..+?)\s+", terms) + result.sites)
         sources: str = " ".join(
-            filter(len, sorted(set(f"{s.replace(s, cls.SITE_ICONS[s]):<2}" or s for s in result.sites), key=len))
+            filter(len, set(sorted([f"{s.replace(s, cls.SITE_ICONS[s]):<2}".strip() or s for s in sites], key=len)))
         )
         # fmt: off
         return (
-            f"Your {result.engine.title()} search returned the following:\n\n"
-            f"{output}\n\n---\n\n"
-            f"`{cls.CATEGORY_ICONS[result.category]} {result.category}`  Sources: {sources}  "
-            f"*Accessed: {geo_location.location} {now('%d %B, %Y')}*\n\n"
+            f"Your {result.engine.title()} search has returned the following results:"
+            f"\n\n{output}\n\n---\n\n"
+            f"`{cls.CATEGORY_ICONS[result.category]:<2} {result.category}`  **Sources:** {sources}  "
+            f"**Access:** {geo_location.location} - *{now('%B %d, %Y')}*\n\n"
             f">   Terms: {terms}")
         # fmt: on
 
@@ -119,27 +122,30 @@ def _build_google_query(search: SearchResult) -> str:
         :return: A string representing the constructed Google Search query.
         """
         # The order of conditions is important here, as the execution may stop early if a condition is met.
-        final_query = ""
+        google_query = ""
         match search.category.casefold():
             case "people":
-                if any((f.find("people:") >= 0 for f in search.filters)):
-                    final_query = f' intext:"{next((f for f in search.filters if f.startswith("people:")), None)}"'
-                    final_query += " AND description AND information AND background"
+                if any((f.find("intext:") >= 0 for f in search.filters)):
+                    google_query = (
+                        "description AND background AND work AND achievements "
+                        f'{next((f for f in search.filters if f.startswith("intext:")), None)}'
+                    )
             case "weather":
                 if any((f.find("weather:") >= 0 for f in search.filters)):
-                    final_query = f' weather:"{next((f for f in search.filters if f.startswith("weather:")), None)}"'
-                    final_query += " AND forecast"
-            # Gather the sites to be used in the search.
-        sites = f"{' OR '.join(set('site:' + url for url in search.sites))}"
-        # Make the final search query use the provided keywords.
-        if search.keywords:
-            final_query = f"{' '.join(set(search.keywords))} {sites} {final_query}"
+                    google_query = (
+                        f'{now("%B %d %Y")} {next((f for f in search.filters if f.startswith("weather:")), None)}'
+                    )
+            case _:
+                if search.keywords:
+                    # Gather the sites to be used in the search.
+                    sites = f"{' OR '.join(set('site:' + url for url in search.sites))}"
+                    google_query = f"{' '.join(set(sorted(search.keywords)))} {sites}"
 
-        return final_query
+        return google_query
 
     def __init__(self):
         API_KEYS.ensure("GOOGLE_API_KEY", "google_search")
-        self._google = GoogleSearchAPIWrapper(google_api_key=API_KEYS.GOOGLE_API_KEY)
+        self._google = GoogleSearchAPIWrapper(k=10, google_api_key=API_KEYS.GOOGLE_API_KEY)
         self._tool = Tool(name="google_search", description="Search Google for recent results.", func=self._google.run)
         self._text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=configs.chunk_size, chunk_overlap=configs.chunk_overlap
@@ -159,21 +165,50 @@ def google_search(self, search: SearchResult) -> str:
         events.reply.emit(reply=AIReply.info(msg.searching()))
         search.sites = search.sites or ["google.com", "bing.com", "duckduckgo.com", "ask.com"]
         terms: str = self._build_google_query(search).strip()
+        question: str = re.sub(r"(\w+:)*|((\w+\.\w+)*)", "", terms, flags=re.DOTALL | re.MULTILINE)
         try:
             log.info("Searching Google for '%s'", terms)
             events.reply.emit(reply=AIReply.debug(msg.final_query(terms)))
-            ctx = str(self._tool.run(terms))
-            llm_prompt = ChatPromptTemplate.from_messages([("system", "{query}\n\n{context}")])
-            context: List[Document] = [Document(ctx)]
+            results: list[str] = str(self._tool.run(terms, verbose=configs.is_debug)).split(" ")
+            llm_prompt = ChatPromptTemplate.from_messages(
+                [
+                    ("system", "Use the following context to answer the question at the end:\\n\\n{context}"),
+                    ("human", "{question}"),
+                ]
+            )
+            docs: List[Document] = [Document(d) for d in results]
             chain = create_stuff_documents_chain(
-                lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp), llm_prompt
+                lc_llm.create_chat_model(temperature=Temperature.COLDEST.temp), llm_prompt
             )
-            output = chain.invoke({"query": search.question, "context": context})
-        except HttpError as err:
+            output = chain.invoke({"question": question, "context": docs})
+        except (HttpError, openai.APIError) as err:
             return msg.fail_to_search(str(err))
 
         return self.refine_search(terms, output, search)
 
+    def refine_search(self, terms: str, response: str, search: SearchResult) -> str:
+        """Refine the text retrieved by the search engine.
+        :param terms: The search terms used in the search.
+        :param response: The internet search response.
+        :param search: The search result object.
+        :return: A refined version of the search result text, tailored to better answer the user's question.
+        """
+        refine_prompt = PromptTemplate.from_template(self.refine_template).format(
+            idiom=shared.idiom,
+            sources=search.sites,
+            location=geo_location.location,
+            datetime=geo_location.datetime,
+            result=response,
+            question=search.question,
+        )
+        log.info("STT::[QUESTION] '%s'", response)
+        llm = lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp)
+
+        if (response := llm.invoke(refine_prompt)) and (output := response.content):
+            return self.wrap_response(terms, output, search)
+
+        return msg.no_good_result()
+
     def scrap_sites(self, search: SearchResult) -> str:
         """Scrape a web page and summarize its contents.
         :param search: The AI search parameters encapsulated in a SearchResult object.
@@ -210,28 +245,5 @@ def _format_docs(docs) -> str:
                 return self.refine_search(search.question, str(output), search)
         return msg.no_output("search")
 
-    def refine_search(self, terms: str, response: str, search: SearchResult) -> str:
-        """Refine the text retrieved by the search engine.
-        :param terms: The search terms used in the search.
-        :param response: The internet search response.
-        :param search: The search result object.
-        :return: A refined version of the search result text, tailored to better answer the user's question.
-        """
-        refine_prompt = PromptTemplate.from_template(self.refine_template).format(
-            idiom=shared.idiom,
-            sources=search.sites,
-            location=geo_location.location,
-            datetime=geo_location.datetime,
-            result=response,
-            question=search.question,
-        )
-        log.info("STT::[QUESTION] '%s'", response)
-        llm = lc_llm.create_chat_model(temperature=Temperature.CREATIVE_WRITING.temp)
-
-        if (response := llm.invoke(refine_prompt)) and (output := response.content):
-            return self.wrap_response(terms, output, search)
-
-        return msg.no_good_result()
-
 
 assert (internet := InternetService().INSTANCE) is not None
diff --git a/src/main/askai/core/enums/acc_color.py b/src/main/askai/core/enums/acc_color.py
@@ -26,13 +26,13 @@ class AccColor(Enumeration):
 
     INTERRUPT   = 'Black', -1
 
-    EXCELLENT   = 'Blue',   0
+    TERMINATE   = 'White',  0
 
-    GOOD        = 'Green',  1
+    EXCELLENT   = 'Blue',   1
 
-    MODERATE    = 'Yellow', 2
+    GOOD        = 'Green',  2
 
-    INCOMPLETE  = 'Orange', 3
+    MODERATE    = 'Yellow', 3
 
     BAD         = 'Red',    4
 
@@ -80,9 +80,17 @@ def val(self) -> int:
         """
         return int(self.value[1])
 
+    @property
+    def is_terminate(self) -> bool:
+        return self == self.TERMINATE
+
+    @property
+    def is_interrupt(self) -> bool:
+        return self == self.INTERRUPT
+
     @property
     def is_bad(self) -> bool:
-        return self in [self.BAD, self.INCOMPLETE]
+        return self == self.BAD
 
     @property
     def is_moderate(self) -> bool:
@@ -92,10 +100,6 @@ def is_moderate(self) -> bool:
     def is_good(self) -> bool:
         return self in [self.GOOD, self.EXCELLENT]
 
-    @property
-    def is_interrupt(self) -> bool:
-        return self == self.INTERRUPT
-
     def passed(self, threshold: "AccColor") -> bool:
         """Determine whether the response matches a 'PASS' classification.
         :param threshold: The threshold or criteria used to determine a 'PASS' classification.

diff --git a/src/main/askai/core/enums/router_mode.py b/src/main/askai/core/enums/router_mode.py
@@ -44,7 +44,7 @@ class RouterMode(Enumeration):
 
     RAG = "Retrieval-Augmented-Generation",     "", rag
 
-    CHAT = "Taius Chat",                        "", chat
+    CHAT = "Taius Chat",                        "", chat
 
     # fmt: on
 

diff --git a/src/main/askai/core/features/processors/task_splitter.py b/src/main/askai/core/features/processors/task_splitter.py
@@ -186,7 +186,7 @@ def _splitter_wrapper_() -> Optional[str]:
             try:
                 agent_output = self._process_tasks(task_list)
                 acc_response: AccResponse = assert_accuracy(question, agent_output, AccColor.MODERATE)
-            except (InterruptionRequest, TerminatingQuery) as err:
+            except InterruptionRequest as err:
                 return str(err)
             except self.RETRIABLE_ERRORS:
                 events.reply.emit(reply=AIReply.error(msg.sorry_retry()))

diff --git a/src/main/askai/core/features/router/evaluation.py b/src/main/askai/core/features/router/evaluation.py
@@ -23,7 +23,7 @@
 from askai.core.support.langchain_support import lc_llm
 from askai.core.support.rag_provider import RAGProvider
 from askai.core.support.shared_instances import shared
-from askai.exception.exceptions import InaccurateResponse, InterruptionRequest
+from askai.exception.exceptions import InaccurateResponse, InterruptionRequest, TerminatingQuery
 from langchain_core.messages import AIMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
 from langchain_core.runnables.history import RunnableWithMessageHistory
@@ -71,6 +71,9 @@ def assert_accuracy(question: str, ai_response: str, pass_threshold: AccColor =
                     # AI flags that it can't continue interacting.
                     log.warning(msg.interruption_requested(output))
                     raise InterruptionRequest(ai_response)
+                elif acc.is_terminate:
+                    # AI flags that the user wants to end the session.
+                    raise TerminatingQuery(ai_response)
                 elif not acc.is_pass(pass_threshold):
                     # Include the guidelines for the first mistake.
                     if not shared.context.get("EVALUATION"):

diff --git a/src/main/askai/core/model/acc_response.py b/src/main/askai/core/model/acc_response.py
@@ -76,6 +76,10 @@ def details(self) -> str:
     def is_interrupt(self) -> bool:
         return self.acc_color.is_interrupt
 
+    @property
+    def is_terminate(self) -> bool:
+        return self.acc_color.is_terminate
+
     def is_pass(self, threshold: AccColor) -> bool:
         """Determine whether the response matches a 'PASS' classification.
         :param threshold: The threshold or criteria used to determine a 'PASS' classification.

diff --git a/src/main/askai/core/support/text_formatter.py b/src/main/askai/core/support/text_formatter.py
@@ -40,7 +40,7 @@ class TextFormatter(metaclass=Singleton):
     RE_MD_CODE_BLOCK = r"(```.+```)"
 
     CHAT_ICONS = {
-        "": " Oops! %NC%",
+        "": "%RED% Oops! %NC%",
         "": "\n>   *Tip:* ",
         "": "\n>   *Analysis:* ",
         "": "\n>   *Summary:* ",
@@ -53,7 +53,7 @@ class TextFormatter(metaclass=Singleton):
     RE_TYPES = {
         "MD": RE_MD_CODE_BLOCK,
         "": RE_URL,
-        "": r"[\s*_]*Errors?[_*-:\s]+",
+        "": r"[\s*_]*Errors?[_*-:\s]+",
         "": r"[\s*_]*Hints?( ([Aa]nd|&) [Tt]ips?)?[_*-:\s]+",
         "": r"[\s*_]*Analysis[_*-:\s]+",
         "": r"[\s*_]*Summary[_*-:\s]+",
@@ -110,7 +110,7 @@ def beautify(self, text: Any) -> str:
         # fmt: off
 
         text = dedent(str(text)).strip()
-        text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
+        text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
         text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
         text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
         text = re.sub(self.RE_TYPES[''], self.CHAT_ICONS[''], text)
@@ -121,8 +121,8 @@ def beautify(self, text: Any) -> str:
         text = re.sub(self.RE_TYPES[''], r" [\1](\1)", text)
         text = re.sub(self.RE_TYPES['MD'], r"\n\1\n", text)
         text = re.sub(r'```(.+)```\s+', r"\n```\1```\n", text)
-        text = text.replace(os.getenv("USER", "user"), f'`{os.getenv("USER", "user")}`')
-        text = text.replace("Taius", f'**Taius**')
+        text = re.sub(rf"\s+{os.getenv('USER', 'user')}", f'` {os.getenv("USER", "user")}`', text)
+        text = re.sub(r"\s+Taius", f' **Taius**', text)
 
         # fmt: on
 

diff --git a/src/main/askai/resources/prompts/evaluation.txt b/src/main/askai/resources/prompts/evaluation.txt
@@ -12,27 +12,38 @@ Avoid using markdown or any special formatting in your response.
 
 Use the following criteria for classification:
 
-1. **Blue**: Level of accuracy [100%-95%]. Reasoning: The AI response is perfect responding to the question posed, including a detailed and accurate information.
+1. **Black:** Level of accuracy [0%-100%]. Reasoning: The AI responds that it cannot continue with further interactions due to a specified reason.
 
-1. **Green**: Level of accuracy [94%-70%]. Reasoning: The AI response successfully addresses the question posed, indicating a full understanding and appropriate analysis.
+2. **White:** Level of accuracy [0%-100%]. Reasoning: Only used when the user has clearly requested to end the session; this is the code that will cause the application to exit.
 
-2. **Yellow**: Level of accuracy [69%-50%]. Reasoning: The AI response partially addresses the question but lacks full depth or detail, suggesting moderate understanding.
+3. **Blue:** Level of accuracy [100%-95%]. Reasoning: The AI response is perfect responding to the question posed, including a detailed and accurate information.
 
-3. **Orange**: Level of accuracy [49%-30%]. Reasoning: The AI response is incomplete or if you have low confidence of the classification.
+4. **Green:** Level of accuracy [94%-70%]. Reasoning: The AI response successfully addresses the question posed, indicating a full understanding and appropriate analysis.
 
-4. **Red**: Level of accuracy [29%-0%]. Reasoning: The AI response fails to adequately address the question, indicating a misunderstanding or incorrect analysis.
+5. **Yellow:** Level of accuracy [69%-50%]. Reasoning: The AI response partially addresses the question but lacks full depth or detail, suggesting moderate understanding.
 
-5. **Black**: Level of accuracy [Any]. Reasoning: The AI responds that it cannot continue with further interactions due to a specified reason.
+6. **Red:** Level of accuracy [29%-0%]. Reasoning: The AI response fails to adequately address the question, indicating a misunderstanding or incorrect analysis.
 
 
-**When evaluating responses, classify 'Black' (Interrupt Responses) when:**
+**When evaluating responses, classify 'Black' when:**
 
-- The response explains the lack of information, context, or when the AI is clearly having trouble understanding the user input.
+- If the response indicates a lack of information or context, ask clarifying questions to gather more details.
 
-- The response is a negative answer to the question.
+- When the AI struggles to understand user input, guide the conversation by requesting clarification, examples, or rephrasing of the question.
 
+- If more context is needed, ask the user to expand or provide specific information to ensure an accurate response.
 
-**When evaluating responses, classify 'Green' or 'Blue' (Known Good/Excellent Responses) when:**
+- When the AI responds "negatively" to the question.
+
+- When providing search results or insights, focus on neutral and helpful responses rather than stating that no information is available.
+
+
+**When evaluating responses, classify 'White' when:**
+
+- The user intends to terminate the session.
+
+
+**When evaluating responses, classify 'Green' or 'Blue' when:**
 
 - 'Blue' if it detects a successful command execution, e.g., "OK, command <any command> succeeded". The "command output" can be disregarded for this classification.
 
@@ -43,7 +54,7 @@ Use the following criteria for classification:
 - Regardless of the question, if the response includes the phrase: "Your search returned the following:".
 
 
-**When evaluating responses, classify 'Red' (Known Bad Responses) when:**
+**When evaluating responses, classify 'Red' when:**
 
 - The response fails to resolve the primary goal; 'Orange' if the response fails to resolve any of the sub goals.
 
@@ -59,8 +70,12 @@ as 'Red' if it does not align with known facts.
 - The AI is seeking user assistance.
 
 
+<<ATTENTION>> If a classification has already been determined (i.e., if any of the above criteria have been met), return immediately to the user.
+
+
 **Classification Guidelines (rate from 0% to 100%):**
 
+
 - Assess the AI's response for correctness by considering its ability to effectively address and correct syntax errors or misinterpretations in the user's input, rather than focusing solely on literal repetitions or minor discrepancies in terminology.
 
 - Revise the classifications for responses from the AI that contain irrelevant information to 'Yellow' instead of 'Red', as any additional information is still valued.