-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf-voice-to-voice-chat.py
158 lines (131 loc) · 5.63 KB
/
pdf-voice-to-voice-chat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import faiss
import streamlit as st
from dotenv import load_dotenv
from langchain_core.messages import AIMessage, HumanMessage
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.vector_stores.faiss import FaissVectorStore
import azure.cognitiveservices.speech as speechsdk
import speech_recognition as sr
d = 1536
faiss_index = faiss.IndexFlatL2(d)
PERSIST_DIR = "./storage"
AZURE_SPEECH_KEY = os.getenv("AZURE_SPEECH_KEY")
AZURE_SPEECH_REGION = os.getenv("AZURE_SPEECH_REGION")
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
def saveUploadedFiles(pdf_docs):
UPLOAD_DIR = 'uploaded_files'
try:
for pdf in pdf_docs:
file_path = os.path.join(UPLOAD_DIR, pdf.name)
with open(file_path, "wb") as f:
f.write(pdf.getbuffer())
return "Done"
except:
return "Error"
def doVectorization():
try:
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
documents = SimpleDirectoryReader("./uploaded_files").load_data()
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context
)
index.storage_context.persist()
return "Done"
except:
return "Error"
def fetchData(user_question):
try:
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
vector_store=vector_store, persist_dir=PERSIST_DIR
)
index = load_index_from_storage(storage_context=storage_context)
query_engine = index.as_query_engine()
response = query_engine.query(user_question)
return str(response)
except:
return "Error"
def transcribe_audio():
recognizer = sr.Recognizer()
microphone = sr.Microphone()
with microphone as source:
recognizer.adjust_for_ambient_noise(source)
audio = recognizer.listen(source, timeout=20)
st.write("🔄 Transcribing...")
try:
text = recognizer.recognize_google(audio)
return text
except sr.RequestError:
return "API unavailable or unresponsive"
except sr.UnknownValueError:
return "Unable to recognize speech"
#============================================================================================================
WelcomeMessage = """
Hello, I am your pdf voice chatbot. Please upload your PDF documents and start asking questions to me.
I would try my best to answer your question from the documents
"""
if "chat_history" not in st.session_state:
st.session_state.chat_history = [
AIMessage(content=WelcomeMessage)
]
AZURE_SPEECH_KEY = os.getenv("AZURE_SPEECH_KEY")
AZURE_SPEECH_REGION = os.getenv("AZURE_SPEECH_REGION")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
speech_config = speechsdk.SpeechConfig(subscription=AZURE_SPEECH_KEY, region=AZURE_SPEECH_REGION)
speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
speech_config.speech_synthesis_language = "en-US"
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
def main():
load_dotenv()
st.set_page_config(
page_title="Chat with multiple PDFs",
page_icon=":sparkles:"
)
st.header("Chat with single or multiple PDFs :sparkles:")
for message in st.session_state.chat_history:
if isinstance(message, AIMessage):
with st.chat_message("AI"):
st.markdown(message.content)
elif isinstance(message, HumanMessage):
with st.chat_message("Human"):
st.markdown(message.content)
with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'",
accept_multiple_files=True
)
if st.button("Process"):
with st.spinner("Processing"):
IsFilesSaved = saveUploadedFiles(pdf_docs)
if IsFilesSaved == "Done":
IsVectorized = doVectorization()
if IsVectorized == "Done":
st.session_state.isPdfProcessed = "done"
st.success("Done!")
else:
st.error("Error! in vectorization")
else:
st.error("Error! in saving the files")
if st.button("Start Asking Question"):
st.write("🎤 Recording started...Ask your question")
transcription = transcribe_audio()
st.write("✅ Recording ended")
st.session_state.chat_history.append(HumanMessage(content=transcription))
with st.chat_message("Human"):
st.markdown(transcription)
with st.chat_message("AI"):
with st.spinner("Fetching data ..."):
response = fetchData(transcription)
st.markdown(response)
result = speech_synthesizer.speak_text_async(response).get()
st.session_state.chat_history.append(AIMessage(content=response))
if "WelcomeMessage" not in st.session_state:
st.session_state.WelcomeMessage = WelcomeMessage
result = speech_synthesizer.speak_text_async(WelcomeMessage).get()
#============================================================================================================
if __name__ == '__main__':
main()