From 626fba5d32ce035b5c9e5b5df70a49324eee0491 Mon Sep 17 00:00:00 2001 From: Hugo Saporetti Junior Date: Sun, 10 Mar 2024 03:45:33 -0300 Subject: [PATCH] Add internet processor --- docs/devel/snippets/snippet-3-internet.txt | 16 +++++ src/main/askai/core/askai.py | 42 ++++++------- .../askai/core/component/cache_service.py | 2 +- .../askai/core/component/internet_service.py | 48 ++++++++++++++ src/main/askai/core/model/chat_context.py | 2 +- .../askai/core/model/internet_research.py | 42 ------------- src/main/askai/core/model/search_result.py | 15 +++++ src/main/askai/core/processor/ai_processor.py | 16 +++-- .../askai/core/processor/command_processor.py | 3 + .../askai/core/processor/generic_processor.py | 22 ++++--- .../core/processor/internet_processor.py | 63 +++++++++++++++++++ .../askai/core/processor/processor_proxy.py | 6 +- src/main/askai/core/support/object_mapper.py | 1 - .../askai/resources/application.properties | 2 +- .../assets/personas/internet-persona.txt | 2 +- .../assets/prompts/internet-prompt.txt | 4 +- 16 files changed, 198 insertions(+), 88 deletions(-) create mode 100644 docs/devel/snippets/snippet-3-internet.txt create mode 100644 src/main/askai/core/component/internet_service.py delete mode 100644 src/main/askai/core/model/internet_research.py create mode 100644 src/main/askai/core/model/search_result.py create mode 100644 src/main/askai/core/processor/internet_processor.py diff --git a/docs/devel/snippets/snippet-3-internet.txt b/docs/devel/snippets/snippet-3-internet.txt new file mode 100644 index 00000000..8ac5b717 --- /dev/null +++ b/docs/devel/snippets/snippet-3-internet.txt @@ -0,0 +1,16 @@ +if __name__ == '__main__': + from googleapiclient.discovery import build + import pprint + + my_api_key = os.environ.get("GOOGLE_API_KEY") + my_cse_id = os.environ.get("GOOGLE_CSE_ID") + + def google_search(search_term, api_key, cse_id, **kwargs): + service = build("customsearch", "v1", developerKey=api_key) + res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() + return res['items'] + + results = google_search( + 'stackoverflow site:en.wikipedia.org', my_api_key, my_cse_id, num=10) + for result in results: + pprint.pprint(result) diff --git a/src/main/askai/core/askai.py b/src/main/askai/core/askai.py index a0f6eeed..06790815 100644 --- a/src/main/askai/core/askai.py +++ b/src/main/askai/core/askai.py @@ -35,14 +35,14 @@ from askai.core.askai_messages import msg from askai.core.askai_prompt import prompt from askai.core.component.audio_player import AudioPlayer -from askai.core.component.cache_service import CacheService -from askai.core.support.object_mapper import ObjectMapper +from askai.core.component.cache_service import cache from askai.core.component.recorder import recorder from askai.core.engine.ai_engine import AIEngine from askai.core.model.chat_context import ChatContext from askai.core.model.query_response import QueryResponse from askai.core.processor.ai_processor import AIProcessor from askai.core.processor.processor_proxy import proxy +from askai.core.support.object_mapper import object_mapper from askai.core.support.shared_instances import shared from askai.core.support.utilities import display_text @@ -88,7 +88,7 @@ def __str__(self) -> str: f"{'--' * 40} %EOL%" f"Interactive: ON %EOL%" f" Speaking: {'ON' if self.is_speak else 'OFF'}{device_info} %EOL%" - f" Caching: {'ON' if CacheService.is_cache_enabled() else 'OFF'} %EOL%" + f" Caching: {'ON' if cache.is_cache_enabled() else 'OFF'} %EOL%" f" Tempo: {configs.tempo} %EOL%" f"{'--' * 40} %EOL%%NC%" ) @@ -186,8 +186,8 @@ def _startup(self) -> None: splash_thread.start() if configs.is_speak: AudioPlayer.INSTANCE.start_delay() - CacheService.set_cache_enable(self.cache_enabled) - CacheService.read_query_history() + cache.set_cache_enable(self.cache_enabled) + cache.read_query_history() askai_bus = AskAiEvents.get_bus(ASKAI_BUS_NAME) askai_bus.subscribe(REPLY_EVENT, self._cb_reply_event) self._ready = True @@ -229,7 +229,7 @@ def _ask_and_reply(self, question: str) -> bool: """Ask the question and provide the reply. :param question: The question to ask to the AI engine. """ - if not (reply := CacheService.read_reply(question)): + if not (reply := cache.read_reply(question)): log.debug('Response not found for "%s" in cache. Querying from %s.', question, self.engine.nickname()) status, response = proxy.process(question) if status: @@ -251,28 +251,24 @@ def _process_response(self, proxy_response: QueryResponse) -> bool: elif proxy_response.terminating: log.info("User wants to terminate the conversation.") return False - elif proxy_response.require_internet: - log.info("Internet is required to fulfill the request.") - pass if q_type := proxy_response.query_type: - processor: AIProcessor = AIProcessor.get_by_query_type(q_type) - if not processor: + if not (processor := AIProcessor.get_by_query_type(q_type)): log.error(f"Unable to find a proper processor for query type: {q_type}") self.reply_error(str(proxy_response)) - else: - log.info("%s::Processing response for '%s'", processor, proxy_response.question) - status, output = processor.process(proxy_response) - if status and processor.next_in_chain(): - mapped_response = ObjectMapper.INSTANCE.of_json(output, QueryResponse) - if isinstance(mapped_response, QueryResponse): - self._process_response(mapped_response) - else: - self.reply(str(mapped_response)) - elif status: - self.reply(str(output)) + return False + log.info("%s::Processing response for '%s'", processor, proxy_response.question) + status, output = processor.process(proxy_response) + if status and processor.next_in_chain(): + mapped_response = object_mapper.of_json(output, QueryResponse) + if isinstance(mapped_response, QueryResponse): + self._process_response(mapped_response) else: - self.reply_error(str(output)) + self.reply(str(mapped_response)) + elif status: + self.reply(str(output)) + else: + self.reply_error(str(output)) else: self.reply_error(msg.invalid_response(proxy_response)) diff --git a/src/main/askai/core/component/cache_service.py b/src/main/askai/core/component/cache_service.py index db19fa3d..2ff36cce 100644 --- a/src/main/askai/core/component/cache_service.py +++ b/src/main/askai/core/component/cache_service.py @@ -104,4 +104,4 @@ def get_audio_file(cls, text: str, audio_format: str = "mp3") -> Tuple[str, bool return audio_file_path, file_is_not_empty(audio_file_path) -assert CacheService().INSTANCE is not None +assert (cache := CacheService().INSTANCE) is not None diff --git a/src/main/askai/core/component/internet_service.py b/src/main/askai/core/component/internet_service.py new file mode 100644 index 00000000..b3233df5 --- /dev/null +++ b/src/main/askai/core/component/internet_service.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + @project: HsPyLib-AskAI + @package: askai.utils + @file: cache_service.py + @created: Tue, 16 Jan 2024 + @author: Hugo Saporetti Junior" + @site: https://github.com/yorevs/hspylib + @license: MIT - Please refer to + + Copyright·(c)·2024,·HSPyLib +""" +import logging as log +import os +from typing import List, Optional + +from hspylib.core.metaclass.singleton import Singleton +from langchain_community.utilities import GoogleSearchAPIWrapper +from langchain_core.tools import Tool + + +class InternetService(metaclass=Singleton): + """Provide a internet search service used to complete queries that require realtime data.ß""" + + INSTANCE: 'InternetService' = None + + ASKAI_INTERNET_DATA_KEY = "askai-internet-data" + + def __init__(self): + self._search = GoogleSearchAPIWrapper() + self._tool = Tool( + name="google_search", description="Search Google for recent results.", func=self._search.run, + ) + + def _top_results(self, query: str, max_results: int = 5) -> List[str]: + """TODO""" + return self._search.results(query, max_results) + + def search(self, query: str) -> Optional[str]: + """TODO""" + search_results = self._tool.run(query) + log.debug(f"Internet search returned: %s", search_results) + return os.linesep.join(search_results) if isinstance(search_results, list) else search_results + + +assert (internet := InternetService().INSTANCE) is not None diff --git a/src/main/askai/core/model/chat_context.py b/src/main/askai/core/model/chat_context.py index f04d3723..ddde80c3 100644 --- a/src/main/askai/core/model/chat_context.py +++ b/src/main/askai/core/model/chat_context.py @@ -73,7 +73,7 @@ def get_many(self, *keys: str) -> List[dict]: for key in keys: if (content := self.get(key)) and (token_length + len(content)) > self._token_limit: raise TokenLengthExceeded(f"Required token length={token_length} limit={self._token_limit}") - context += content + context += content or '' return context def clear(self, key: str) -> int: diff --git a/src/main/askai/core/model/internet_research.py b/src/main/askai/core/model/internet_research.py deleted file mode 100644 index 50e61b38..00000000 --- a/src/main/askai/core/model/internet_research.py +++ /dev/null @@ -1,42 +0,0 @@ -import os -from dataclasses import dataclass -from typing import List - - -@dataclass -class InternetResearch: - """Keep track of the internet search responses.""" - - keywords: List[str] - urls: List[str] - results: List[str] - - -if __name__ == '__main__': - from langchain_community.utilities import GoogleSearchAPIWrapper - from langchain_core.tools import Tool - - search = GoogleSearchAPIWrapper() - - tool = Tool( - name="google_search", - description="Search Google for recent results.", - func=search.run, - ) - - print(tool.run("Obama's first name?")) - # from googleapiclient.discovery import build - # import pprint - # - # my_api_key = os.environ.get("GOOGLE_API_KEY") - # my_cse_id = os.environ.get("GOOGLE_CSE_ID") - # - # def google_search(search_term, api_key, cse_id, **kwargs): - # service = build("customsearch", "v1", developerKey=api_key) - # res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute() - # return res['items'] - # - # results = google_search( - # 'stackoverflow site:en.wikipedia.org', my_api_key, my_cse_id, num=10) - # for result in results: - # pprint.pprint(result) diff --git a/src/main/askai/core/model/search_result.py b/src/main/askai/core/model/search_result.py new file mode 100644 index 00000000..d4ecdfea --- /dev/null +++ b/src/main/askai/core/model/search_result.py @@ -0,0 +1,15 @@ +import json +from dataclasses import dataclass +from typing import List + + +@dataclass +class SearchResult: + """Keep track of the internet search responses.""" + + query: str = None + urls: str | List[str] = None + results: str = None + + def __str__(self): + return f"Internet search results: {json.dumps(self.__dict__, default=lambda obj: obj.__dict__)}" diff --git a/src/main/askai/core/processor/ai_processor.py b/src/main/askai/core/processor/ai_processor.py index 2cedf873..4f0d3dad 100644 --- a/src/main/askai/core/processor/ai_processor.py +++ b/src/main/askai/core/processor/ai_processor.py @@ -43,9 +43,10 @@ def find_query_types(cls) -> str: proc_name = os.path.splitext(proc)[0] proc_pkg = import_module(f"{__package__}.{proc_name}") proc_class = getattr(proc_pkg, camelcase(proc_name, capitalized=True)) - proc_inst = proc_class() + proc_inst: 'AIProcessor' = proc_class() cls._PROCESSORS[proc_inst.processor_id()] = proc_inst - q_types.append(str(proc_inst)) + if proc_inst.query_desc(): + q_types.append(str(proc_inst)) return os.linesep.join(q_types) @classmethod @@ -67,6 +68,7 @@ def get_by_name(cls, name: str) -> Optional['AIProcessor']: def __init__(self, template_file: str | Path, persona_file: str | Path): self._template_file = str(template_file) self._persona_file = str(persona_file) + self._next_in_chain = None def __str__(self): return f"'{self.query_type()}': {self.query_desc()}" @@ -87,18 +89,22 @@ def processor_id(self) -> str: def query_type(self) -> str: """Get the query type this processor can handle. By default, it's the name of the processor itself.""" - return self.name + return self.processor_id() def query_desc(self) -> str: """TODO""" - ... + return '' def template(self) -> str: return prompt.read_prompt(self._template_file, self._persona_file) def next_in_chain(self) -> Optional['AIProcessor']: """Return the next processor in the chain to call. Defaults to None.""" - return None + return self._next_in_chain + + def bind(self, next_in_chain: 'AIProcessor'): + """Bind a processor to be the next in chain.""" + self._next_in_chain = next_in_chain def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]: """TODO""" diff --git a/src/main/askai/core/processor/command_processor.py b/src/main/askai/core/processor/command_processor.py index 96ef2c29..5d8e626e 100644 --- a/src/main/askai/core/processor/command_processor.py +++ b/src/main/askai/core/processor/command_processor.py @@ -44,6 +44,9 @@ def query_desc(self) -> str: "file, folder and application management, listing, device assessment or inquiries." ) + def bind(self, next_in_chain: 'AIProcessor'): + pass # Avoid re-binding the next in chain processor. + def next_in_chain(self) -> AIProcessor: return AIProcessor.get_by_name(OutputProcessor.__name__) diff --git a/src/main/askai/core/processor/generic_processor.py b/src/main/askai/core/processor/generic_processor.py index f1e83800..a50131e4 100644 --- a/src/main/askai/core/processor/generic_processor.py +++ b/src/main/askai/core/processor/generic_processor.py @@ -19,9 +19,10 @@ from askai.core.askai_messages import msg from askai.core.askai_prompt import prompt -from askai.core.component.cache_service import CacheService +from askai.core.component.cache_service import cache from askai.core.model.query_response import QueryResponse from askai.core.processor.ai_processor import AIProcessor +from askai.core.processor.internet_processor import InternetProcessor from askai.core.support.shared_instances import shared @@ -40,23 +41,28 @@ def query_desc(self) -> str: def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]: status = False output = None - template = PromptTemplate( - input_variables=['user'], template=self.template()) - final_prompt: str = msg.translate( - template.format(user=prompt.user)) + template = PromptTemplate(input_variables=['user'], template=self.template()) + final_prompt: str = msg.translate(template.format(user=prompt.user)) shared.context.set("SETUP", final_prompt, 'system') shared.context.set("QUESTION", query_response.question) context: List[dict] = shared.context.get_many("GENERAL", "SETUP", "QUESTION") log.info("Setup::[GENERIC] '%s' context=%s", query_response.question, context) try: + if query_response.require_internet: + log.info("Internet is required to fulfill the request.") + i_processor = AIProcessor.get_by_name(InternetProcessor.__name__) + status, output = i_processor.process(query_response) + i_ctx = shared.context.get("INTERNET") + list(map(lambda c: context.insert(len(context) - 2, c), i_ctx)) if (response := shared.engine.ask(context, temperature=1, top_p=1)) and response.is_success: output = response.message - CacheService.save_reply(query_response.question, query_response.question) shared.context.push("GENERAL", output, 'assistant') - CacheService.save_reply(query_response.question, output) - CacheService.save_query_history() + cache.save_reply(query_response.question, output) + cache.save_query_history() status = True else: output = msg.llm_error(response.message) + except Exception as err: + output = msg.llm_error(str(err)) finally: return status, output diff --git a/src/main/askai/core/processor/internet_processor.py b/src/main/askai/core/processor/internet_processor.py new file mode 100644 index 00000000..507c6f69 --- /dev/null +++ b/src/main/askai/core/processor/internet_processor.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + @project: HsPyLib-AskAI + @package: askai.core.processor + @file: generic_processor.py + @created: Fri, 23 Feb 2024 + @author: Hugo Saporetti Junior" + @site: https://github.com/yorevs/hspylib + @license: MIT - Please refer to + + Copyright·(c)·2024,·HSPyLib +""" +import logging as log +from typing import Tuple, Optional, List + +from langchain_core.prompts import PromptTemplate + +from askai.core.askai_messages import msg +from askai.core.askai_prompt import prompt +from askai.core.component.cache_service import cache +from askai.core.component.internet_service import internet +from askai.core.model.query_response import QueryResponse +from askai.core.model.search_result import SearchResult +from askai.core.processor.ai_processor import AIProcessor +from askai.core.support.object_mapper import object_mapper +from askai.core.support.shared_instances import shared + + +class InternetProcessor(AIProcessor): + """Process generic prompts.""" + + def __init__(self): + super().__init__('internet-prompt', 'internet-persona') + + def process(self, query_response: QueryResponse) -> Tuple[bool, Optional[str]]: + status = False + output = None + template = PromptTemplate(input_variables=['user'], template=self.template()) + final_prompt: str = msg.translate(template.format(user=prompt.user)) + shared.context.set("SETUP", final_prompt, 'system') + shared.context.set("QUESTION", query_response.question) + context: List[dict] = shared.context.get_many("SETUP", "QUESTION") + log.info("Setup::[INTERNET] '%s' context=%s", query_response.question, context) + try: + if not (response := cache.read_reply(query_response.question)): + if (response := shared.engine.ask(context, temperature=0.0, top_p=0.0)) and response.is_success: + search_result: SearchResult = object_mapper.of_json(response.message, SearchResult) + if results := internet.search(search_result.query): + search_result.results = results + output = str(search_result) + shared.context.set("INTERNET", output, 'assistant') + cache.save_reply(query_response.question, output) + status = True + else: + output = msg.llm_error(response.message) + else: + log.debug('Reply found for "%s" in cache.', query_response.question) + output = response + status = True + finally: + return status, output diff --git a/src/main/askai/core/processor/processor_proxy.py b/src/main/askai/core/processor/processor_proxy.py index 60569b51..bf9c267f 100644 --- a/src/main/askai/core/processor/processor_proxy.py +++ b/src/main/askai/core/processor/processor_proxy.py @@ -21,9 +21,9 @@ from askai.core.askai_messages import msg from askai.core.askai_prompt import prompt -from askai.core.support.object_mapper import ObjectMapper from askai.core.model.query_response import QueryResponse from askai.core.processor.ai_processor import AIProcessor +from askai.core.support.object_mapper import object_mapper from askai.core.support.shared_instances import shared @@ -51,14 +51,14 @@ def process(self, question: str) -> Tuple[bool, QueryResponse]: template = PromptTemplate( input_variables=[], template=self.template ) - final_prompt = template.format(query_types=self.query_types) + final_prompt = msg.translate(template.format(query_types=self.query_types)) shared.context.set("SETUP", final_prompt, 'system') shared.context.set("QUESTION", question) context: List[dict] = shared.context.get_many("CONTEXT", "SETUP", "QUESTION") log.info("Ask::[QUESTION] '%s' context=%s", question, context) if (response := shared.engine.ask(context, temperature=0.0, top_p=0.0)) and response.is_success: log.info('Ask::[PROXY] Received from AI: %s.', str(response)) - output = ObjectMapper.INSTANCE.of_json(response.message, QueryResponse) + output = object_mapper.of_json(response.message, QueryResponse) if not isinstance(output, QueryResponse): log.error(msg.invalid_response(output)) output = QueryResponse(question=question, terminating=True, response=response.message) diff --git a/src/main/askai/core/support/object_mapper.py b/src/main/askai/core/support/object_mapper.py index e9a061e6..2471c173 100644 --- a/src/main/askai/core/support/object_mapper.py +++ b/src/main/askai/core/support/object_mapper.py @@ -19,7 +19,6 @@ from hspylib.core.enums.enumeration import Enumeration from hspylib.core.metaclass.singleton import Singleton -from askai.core.model.query_response import QueryResponse from askai.exception.exceptions import InvalidJsonMapping, InvalidMapping FnConverter: TypeAlias = Callable[[Any, Type], Any] diff --git a/src/main/askai/resources/application.properties b/src/main/askai/resources/application.properties index 988af850..fa0f0e83 100644 --- a/src/main/askai/resources/application.properties +++ b/src/main/askai/resources/application.properties @@ -1,4 +1,4 @@ -askai.cache.enabled = True +askai.cache.enabled = False askai.speech.tempo = 1 askai.speak.response = False diff --git a/src/main/askai/resources/assets/personas/internet-persona.txt b/src/main/askai/resources/assets/personas/internet-persona.txt index b3dd071d..7d2d51e2 100644 --- a/src/main/askai/resources/assets/personas/internet-persona.txt +++ b/src/main/askai/resources/assets/personas/internet-persona.txt @@ -1 +1 @@ -You are 'Taius', the AskAI assistant. Act as a means of internet crawler and researcher. Your role is to identify the 'keywords' and 'URLs' required to fulfill the question. +You are 'Taius', the AskAI assistant. Act as a means of internet crawler and researcher. diff --git a/src/main/askai/resources/assets/prompts/internet-prompt.txt b/src/main/askai/resources/assets/prompts/internet-prompt.txt index 8bd90e03..8e886d83 100644 --- a/src/main/askai/resources/assets/prompts/internet-prompt.txt +++ b/src/main/askai/resources/assets/prompts/internet-prompt.txt @@ -2,8 +2,8 @@ ${persona} Before responding to the user, you must follow the step-by-step instructions provided below in sequential order: -1. Determine the required keywords for a good search for the answer. +1. Craft a well-structured query string to effectively search for the desired answer. 2. Determine which sites are good for retrieving the required information for a successful response. 3. Generate a JSON response containing the designated fields. -4. The final response 'JSON' must contain the fields: 'keywords', 'urls'. +4. The final response 'JSON' must contain the fields: 'query' and 'urls'. 5. The final response is a formatted JSON with no additional description or context.