Skip to content

Commit

Permalink
gemi (speak): add voice capability to gemi
Browse files Browse the repository at this point in the history
  • Loading branch information
Akash98Sky committed Jan 13, 2024
1 parent dcd34a1 commit 3df6f61
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,6 @@ cython_debug/

# Testing purpose
test.*

# Temporary files.
temp/voice_*.*
8 changes: 6 additions & 2 deletions bot/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from typing import Any, Dict, Union
from aiogram import Router
from aiogram.filters import CommandStart
from aiogram.types import Message
from aiogram.enums import ParseMode
from aiogram.types import Message, InputMediaAudio
from aiogram.exceptions import TelegramBadRequest
from aiogram.utils.markdown import bold, italic, pre

Expand Down Expand Up @@ -63,6 +62,11 @@ async def echo_handler(message: Message, repo: ChatRepo, prompts: list[Union[str
await sent.delete()
sent = None
await message.reply_media_group(media=reply)
elif isinstance(reply, InputMediaAudio):
if sent:
await sent.delete()
sent = None
await message.reply_voice(voice=reply.media)
elif sent:
response = response + reply
error = None
Expand Down
32 changes: 26 additions & 6 deletions chat/query_processor.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,31 @@
import asyncio
from logging import Logger, getLogger
import aiohttp
from aiogram.types import InputMediaPhoto, BufferedInputFile
import pyttsx3
from aiogram.types import InputMediaPhoto, InputMediaAudio, FSInputFile
from duckduckgo_search import AsyncDDGS
from google.generativeai.generative_models import ChatSession, content_types
import time

from chat.service import ChatService
from prompts.keywords import IMAGE_QUERY, SEARCH_QUERIES
from prompts.keywords import IMAGE_QUERY, SEARCH_QUERIES, VOICE_RESPONSE
from prompts.templates import build_searchengine_response_prompt

logging: Logger = getLogger(__name__)

class QueryProcessor():
__service: ChatService
__voice_engine: pyttsx3.Engine
__query_list__ = [
IMAGE_QUERY,
SEARCH_QUERIES,
VOICE_RESPONSE
]

def __init__(self, service: ChatService):
self.__service = service
self.__voice_engine = pyttsx3.init()
self.__voice_engine.setProperty('voice', self.__voice_engine.getProperty('voices')[1].id)
self.__voice_engine.setProperty('rate', 140)

async def __process_searchengine_query__(self, query: str):
async with AsyncDDGS() as ddgs:
Expand Down Expand Up @@ -48,15 +58,22 @@ async def __gen_image_data__(self, query: str):

return images

async def process_response(self, session: ChatSession, messages: list[content_types.PartType]):
async def __gen_voice_data__(self, query: str, chat_id: int):
logging.debug(f"Generate voice query: {query}")
file_path = f"temp/voice_{chat_id}_{int(time.time())}.mp3"
self.__voice_engine.save_to_file(query, file_path)
self.__voice_engine.runAndWait()
return InputMediaAudio(media=FSInputFile(file_path))

async def process_response(self, session: ChatSession, messages: list[content_types.PartType], chat_id: int):
text = ""
has_query = False
response_stream = self.__service.gen_response_stream(prompts=messages, chat=session)

async for res in response_stream:
text += res
if len(text) > 15:
if text.startswith(f"{SEARCH_QUERIES}:") or text.startswith(f"{IMAGE_QUERY}:"):
if len([query for query in self.__query_list__ if text.startswith(f"{query}:")]) > 0:
has_query = True
else:
yield text
Expand All @@ -69,9 +86,12 @@ async def process_response(self, session: ChatSession, messages: list[content_ty
if text.startswith(f"{IMAGE_QUERY}:"):
query = text.replace(f"{IMAGE_QUERY}:", "").strip()
yield await self.__gen_image_data__(query)
elif text.startswith(f"{VOICE_RESPONSE}:"):
query = text.replace(f"{VOICE_RESPONSE}:", "").strip()
yield await self.__gen_voice_data__(query, chat_id)
else:
queries = text.replace(f"{SEARCH_QUERIES}:\n-", "").split("\n-")
query_responses_prompt = await self.__gen_live_data_prompt__(queries)
response_stream = self.process_response(session=session, messages=[query_responses_prompt])
response_stream = self.process_response(session=session, messages=[query_responses_prompt], chat_id=chat_id)
async for res in response_stream:
yield res
12 changes: 7 additions & 5 deletions chat/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@
from chat.query_processor import QueryProcessor

class Chat():
__id: int
__session: ChatSession
__processor: QueryProcessor
__sem = asyncio.BoundedSemaphore(1)

def __init__(self, session: ChatSession, processor: QueryProcessor):
def __init__(self, id: int, session: ChatSession, processor: QueryProcessor):
self.__id = id
self.__session = session
self.__processor = processor

async def send_message_async(self, messages: Union[Iterable[content_types.PartType], str]):
async with self.__sem:
# generate new response only if earlier responses are complete
async for reply in self.__processor.process_response(session=self.__session, messages=messages):
async for reply in self.__processor.process_response(session=self.__session, messages=messages, chat_id=self.__id):
yield reply

class ChatRepo():
Expand All @@ -26,15 +28,15 @@ class ChatRepo():
__chat_creation_sem = asyncio.BoundedSemaphore(1)
__query_processor: QueryProcessor

def __init__(self, service: ChatService) -> None:
def __init__(self, service: ChatService, processor: QueryProcessor) -> None:
self.__service = service
self.__chats = {}
self.__query_processor = QueryProcessor(service=service)
self.__query_processor = processor

async def get_chat_session(self, chat_id: int):
async with self.__chat_creation_sem:
if chat_id not in self.__chats.keys():
session = self.__service.create_chat_session()
self.__chats[chat_id] = Chat(session=session, processor=self.__query_processor)
self.__chats[chat_id] = Chat(id=chat_id, session=session, processor=self.__query_processor)
return self.__chats[chat_id]

4 changes: 3 additions & 1 deletion containers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dependency_injector import providers, containers
from bot.bot import TgBot
from chat.query_processor import QueryProcessor
from chat.repository import ChatRepo
from chat.service import ChatService

Expand All @@ -9,5 +10,6 @@ class Configs(containers.DeclarativeContainer):

class BotContainer(containers.DeclarativeContainer):
chat_service = providers.Singleton(ChatService, api_key=Configs.chat_config.api_key, bing_cookie=Configs.chat_config.bing_cookie)
chat_repo = providers.Factory(ChatRepo, service=chat_service)
query_processor = providers.Singleton(QueryProcessor, service=chat_service)
chat_repo = providers.Factory(ChatRepo, service=chat_service, processor=query_processor)
tg_bot = providers.Singleton(TgBot, token=Configs.bot_config.token, chat_repo=chat_repo, webhook_host=Configs.bot_config.webhook_host, webhook_secret=Configs.bot_config.webhook_secret)
3 changes: 2 additions & 1 deletion prompts/keywords.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
MESSAGE_METADATA = 'message_metadata'
SEARCH_QUERIES = 'search_queries'
SEARCH_RESPONSES = 'search_responses'
IMAGE_QUERY = 'image_query'
IMAGE_QUERY = 'image_query'
VOICE_RESPONSE = 'voice_response'
5 changes: 3 additions & 2 deletions prompts/static.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from google.generativeai.generative_models import content_types

from prompts.keywords import IMAGE_QUERY, MESSAGE_METADATA, SEARCH_QUERIES, SEARCH_RESPONSES
from prompts.keywords import IMAGE_QUERY, MESSAGE_METADATA, SEARCH_QUERIES, SEARCH_RESPONSES, VOICE_RESPONSE

CHAT_INIT_HISTORY = [
content_types.ContentDict(parts = ["""
Expand All @@ -10,9 +10,10 @@
Here are a few set of rules that you should follow.
""", f"""
Rules:
- A response you generate can be either your answer or a set of search queries to gather missing informations or a image query to generate an image. Never mix the three in a single response.
- A response you generate can be either your answer in text/voice or a set of search queries to gather missing informations or a image query to generate an image. Never mix any of the four types in a single response.
- Ask conversational questions and don't generate any queries until you understand the exact motive of the conversation.
- Generate an image query only when the user asks for an image. The image query should be in the format, "{IMAGE_QUERY}: <image-query>". The <image-query> should contain a detailed description of the image that the user asked for.
- Generate a voice response only when the user asks you to speak or send a voice or audio message. The voice response should be in the format, "{VOICE_RESPONSE}: <voice-response>". The <voice-response> should contain the voice text of your response.
- The search queries message format should be just like, "{SEARCH_QUERIES}:\n- <query-1>\n- <query-2>\n- <query-3> ...\n- <query-n>"
- Generate 5 search queries when you're less than 50% confident, 4 search queries if greater than 50%, 3 if greater than 60%, 2 if greater than 70%, 1 if greater than 80%. Don't ask search query if you're more than 90% confident.
- On response to search queries you'll receive search responses in the format, "{SEARCH_RESPONSES}:\n- query:<query-1>\n- title:<title-1>\n- body:<body-1>\n- url:<url-1>\n- ...\n- query:<query-n>\n- title:<title-n>\n- body:<body-n>\n- url:<url-n>\n"
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pypdfium2~=4.25.0
duckduckgo-search~=4.1.1
sentry_sdk~=1.39.1
re_edge_gpt~=0.0.20
md2tgmd @ git+https://github.com/yym68686/md2tgmd.git
md2tgmd @ git+https://github.com/yym68686/md2tgmd.git
pyttsx3~=2.90
Empty file added temp/.gitkeep
Empty file.

0 comments on commit 3df6f61

Please sign in to comment.