diff --git a/.gitignore b/.gitignore index 72a4da3..92aefdd 100644 --- a/.gitignore +++ b/.gitignore @@ -161,3 +161,6 @@ cython_debug/ # Testing purpose test.* + +# Temporary files. +temp/voice_*.* \ No newline at end of file diff --git a/bot/routes.py b/bot/routes.py index a84c79a..8ce2472 100644 --- a/bot/routes.py +++ b/bot/routes.py @@ -3,8 +3,7 @@ from typing import Any, Dict, Union from aiogram import Router from aiogram.filters import CommandStart -from aiogram.types import Message -from aiogram.enums import ParseMode +from aiogram.types import Message, InputMediaAudio from aiogram.exceptions import TelegramBadRequest from aiogram.utils.markdown import bold, italic, pre @@ -63,6 +62,11 @@ async def echo_handler(message: Message, repo: ChatRepo, prompts: list[Union[str await sent.delete() sent = None await message.reply_media_group(media=reply) + elif isinstance(reply, InputMediaAudio): + if sent: + await sent.delete() + sent = None + await message.reply_voice(voice=reply.media) elif sent: response = response + reply error = None diff --git a/chat/query_processor.py b/chat/query_processor.py index 1f0c798..8aea619 100644 --- a/chat/query_processor.py +++ b/chat/query_processor.py @@ -1,21 +1,31 @@ import asyncio from logging import Logger, getLogger -import aiohttp -from aiogram.types import InputMediaPhoto, BufferedInputFile +import pyttsx3 +from aiogram.types import InputMediaPhoto, InputMediaAudio, FSInputFile from duckduckgo_search import AsyncDDGS from google.generativeai.generative_models import ChatSession, content_types +import time from chat.service import ChatService -from prompts.keywords import IMAGE_QUERY, SEARCH_QUERIES +from prompts.keywords import IMAGE_QUERY, SEARCH_QUERIES, VOICE_RESPONSE from prompts.templates import build_searchengine_response_prompt logging: Logger = getLogger(__name__) class QueryProcessor(): __service: ChatService + __voice_engine: pyttsx3.Engine + __query_list__ = [ + IMAGE_QUERY, + SEARCH_QUERIES, + VOICE_RESPONSE + ] def __init__(self, service: ChatService): self.__service = service + self.__voice_engine = pyttsx3.init() + self.__voice_engine.setProperty('voice', self.__voice_engine.getProperty('voices')[1].id) + self.__voice_engine.setProperty('rate', 140) async def __process_searchengine_query__(self, query: str): async with AsyncDDGS() as ddgs: @@ -48,7 +58,14 @@ async def __gen_image_data__(self, query: str): return images - async def process_response(self, session: ChatSession, messages: list[content_types.PartType]): + async def __gen_voice_data__(self, query: str, chat_id: int): + logging.debug(f"Generate voice query: {query}") + file_path = f"temp/voice_{chat_id}_{int(time.time())}.mp3" + self.__voice_engine.save_to_file(query, file_path) + self.__voice_engine.runAndWait() + return InputMediaAudio(media=FSInputFile(file_path)) + + async def process_response(self, session: ChatSession, messages: list[content_types.PartType], chat_id: int): text = "" has_query = False response_stream = self.__service.gen_response_stream(prompts=messages, chat=session) @@ -56,7 +73,7 @@ async def process_response(self, session: ChatSession, messages: list[content_ty async for res in response_stream: text += res if len(text) > 15: - if text.startswith(f"{SEARCH_QUERIES}:") or text.startswith(f"{IMAGE_QUERY}:"): + if len([query for query in self.__query_list__ if text.startswith(f"{query}:")]) > 0: has_query = True else: yield text @@ -69,9 +86,12 @@ async def process_response(self, session: ChatSession, messages: list[content_ty if text.startswith(f"{IMAGE_QUERY}:"): query = text.replace(f"{IMAGE_QUERY}:", "").strip() yield await self.__gen_image_data__(query) + elif text.startswith(f"{VOICE_RESPONSE}:"): + query = text.replace(f"{VOICE_RESPONSE}:", "").strip() + yield await self.__gen_voice_data__(query, chat_id) else: queries = text.replace(f"{SEARCH_QUERIES}:\n-", "").split("\n-") query_responses_prompt = await self.__gen_live_data_prompt__(queries) - response_stream = self.process_response(session=session, messages=[query_responses_prompt]) + response_stream = self.process_response(session=session, messages=[query_responses_prompt], chat_id=chat_id) async for res in response_stream: yield res diff --git a/chat/repository.py b/chat/repository.py index 7aec86c..89070e4 100644 --- a/chat/repository.py +++ b/chat/repository.py @@ -6,18 +6,20 @@ from chat.query_processor import QueryProcessor class Chat(): + __id: int __session: ChatSession __processor: QueryProcessor __sem = asyncio.BoundedSemaphore(1) - def __init__(self, session: ChatSession, processor: QueryProcessor): + def __init__(self, id: int, session: ChatSession, processor: QueryProcessor): + self.__id = id self.__session = session self.__processor = processor async def send_message_async(self, messages: Union[Iterable[content_types.PartType], str]): async with self.__sem: # generate new response only if earlier responses are complete - async for reply in self.__processor.process_response(session=self.__session, messages=messages): + async for reply in self.__processor.process_response(session=self.__session, messages=messages, chat_id=self.__id): yield reply class ChatRepo(): @@ -26,15 +28,15 @@ class ChatRepo(): __chat_creation_sem = asyncio.BoundedSemaphore(1) __query_processor: QueryProcessor - def __init__(self, service: ChatService) -> None: + def __init__(self, service: ChatService, processor: QueryProcessor) -> None: self.__service = service self.__chats = {} - self.__query_processor = QueryProcessor(service=service) + self.__query_processor = processor async def get_chat_session(self, chat_id: int): async with self.__chat_creation_sem: if chat_id not in self.__chats.keys(): session = self.__service.create_chat_session() - self.__chats[chat_id] = Chat(session=session, processor=self.__query_processor) + self.__chats[chat_id] = Chat(id=chat_id, session=session, processor=self.__query_processor) return self.__chats[chat_id] \ No newline at end of file diff --git a/containers.py b/containers.py index c6d4880..9b9fc70 100644 --- a/containers.py +++ b/containers.py @@ -1,5 +1,6 @@ from dependency_injector import providers, containers from bot.bot import TgBot +from chat.query_processor import QueryProcessor from chat.repository import ChatRepo from chat.service import ChatService @@ -9,5 +10,6 @@ class Configs(containers.DeclarativeContainer): class BotContainer(containers.DeclarativeContainer): chat_service = providers.Singleton(ChatService, api_key=Configs.chat_config.api_key, bing_cookie=Configs.chat_config.bing_cookie) - chat_repo = providers.Factory(ChatRepo, service=chat_service) + query_processor = providers.Singleton(QueryProcessor, service=chat_service) + chat_repo = providers.Factory(ChatRepo, service=chat_service, processor=query_processor) tg_bot = providers.Singleton(TgBot, token=Configs.bot_config.token, chat_repo=chat_repo, webhook_host=Configs.bot_config.webhook_host, webhook_secret=Configs.bot_config.webhook_secret) diff --git a/prompts/keywords.py b/prompts/keywords.py index 680f09e..e2e0ddf 100644 --- a/prompts/keywords.py +++ b/prompts/keywords.py @@ -1,4 +1,5 @@ MESSAGE_METADATA = 'message_metadata' SEARCH_QUERIES = 'search_queries' SEARCH_RESPONSES = 'search_responses' -IMAGE_QUERY = 'image_query' \ No newline at end of file +IMAGE_QUERY = 'image_query' +VOICE_RESPONSE = 'voice_response' \ No newline at end of file diff --git a/prompts/static.py b/prompts/static.py index 8ae96a8..5ff8c51 100644 --- a/prompts/static.py +++ b/prompts/static.py @@ -1,6 +1,6 @@ from google.generativeai.generative_models import content_types -from prompts.keywords import IMAGE_QUERY, MESSAGE_METADATA, SEARCH_QUERIES, SEARCH_RESPONSES +from prompts.keywords import IMAGE_QUERY, MESSAGE_METADATA, SEARCH_QUERIES, SEARCH_RESPONSES, VOICE_RESPONSE CHAT_INIT_HISTORY = [ content_types.ContentDict(parts = [""" @@ -10,9 +10,10 @@ Here are a few set of rules that you should follow. """, f""" Rules: -- A response you generate can be either your answer or a set of search queries to gather missing informations or a image query to generate an image. Never mix the three in a single response. +- A response you generate can be either your answer in text/voice or a set of search queries to gather missing informations or a image query to generate an image. Never mix any of the four types in a single response. - Ask conversational questions and don't generate any queries until you understand the exact motive of the conversation. - Generate an image query only when the user asks for an image. The image query should be in the format, "{IMAGE_QUERY}: ". The should contain a detailed description of the image that the user asked for. +- Generate a voice response only when the user asks you to speak or send a voice or audio message. The voice response should be in the format, "{VOICE_RESPONSE}: ". The should contain the voice text of your response. - The search queries message format should be just like, "{SEARCH_QUERIES}:\n- \n- \n- ...\n- " - Generate 5 search queries when you're less than 50% confident, 4 search queries if greater than 50%, 3 if greater than 60%, 2 if greater than 70%, 1 if greater than 80%. Don't ask search query if you're more than 90% confident. - On response to search queries you'll receive search responses in the format, "{SEARCH_RESPONSES}:\n- query:\n- title:\n- body:\n- url:\n- ...\n- query:\n- title:\n- body:\n- url:\n" diff --git a/requirements.txt b/requirements.txt index be4e96a..67ee789 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ pypdfium2~=4.25.0 duckduckgo-search~=4.1.1 sentry_sdk~=1.39.1 re_edge_gpt~=0.0.20 -md2tgmd @ git+https://github.com/yym68686/md2tgmd.git \ No newline at end of file +md2tgmd @ git+https://github.com/yym68686/md2tgmd.git +pyttsx3~=2.90 \ No newline at end of file diff --git a/temp/.gitkeep b/temp/.gitkeep new file mode 100644 index 0000000..e69de29