forked from jensenchackosaji/Stockloom
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
116 lines (96 loc) · 3.98 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
import os
import pickle
from pdfreader import read_pdf
os.environ["OPENAI_API_KEY"] = "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"
# Mapping of PDFs
pdf_mapping = {
'Lupin': 'LUPIN.pdf',
'INDUSTOWER': 'INDUSTOWER.pdf',
'M&M': 'M&M_Ltd.pdf',
'BAJAJFINSERV': 'BAJAJFINSERV.pdf',
'SBIN': 'SBIN.pdf'
# Add more mappings as needed
}
# Main Streamlit app
def main():
st.title("Query your PDF")
with st.sidebar:
st.title('StockLoom')
st.markdown('''
## About
Choose the desired Stock, then perform a query.
''')
custom_names = list(pdf_mapping.keys())
selected_custom_name = st.sidebar.selectbox('Choose your PDF', ['', *custom_names])
selected_actual_name = pdf_mapping.get(selected_custom_name)
# print("1234567")
# print(selected_actual_name)
if selected_actual_name:
pdf_folder = "D:\MecHackStockLoom\Stockloom\statement_pdf"
file_path = os.path.join(pdf_folder, selected_actual_name)
# print(file_path)
try:
text = read_pdf(file_path)
st.info("The content of the PDF is hidden. Type your query in the chat window.")
except FileNotFoundError:
st.error(f"File not found: {file_path}")
return
except Exception as e:
st.error(f"Error occurred while reading the PDF: {e}")
return
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=150,
length_function=len
)
# Process the PDF text and create the documents list
documents = text_splitter.split_text(text=text)
# Vectorize the documents and create vectorstore
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(documents, embedding=embeddings)
st.session_state.processed_data = {
"document_chunks": documents,
"vectorstore": vectorstore,
}
# Save vectorstore using pickle
pickle_folder = "Pickle"
if not os.path.exists(pickle_folder):
os.mkdir(pickle_folder)
pickle_file_path = os.path.join(pickle_folder, f"{selected_custom_name}.pkl")
if not os.path.exists(pickle_file_path):
with open(pickle_file_path, "wb") as f:
pickle.dump(vectorstore, f)
# # Load the Langchain chatbot
# llm = ChatOpenAI(temperature=0.0, max_tokens=1000, model_name="gpt-3.5-turbo")
# qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())
#
# # Initialize Streamlit chat UI
# if "messages" not in st.session_state:
# st.session_state.messages = []
#
# for message in st.session_state.messages:
# with st.chat_message(message["role"]):
# st.markdown(message["content"])
#
# if prompt := st.chat_input("Ask your questions from PDF "f'{selected_custom_name}'"?"):
# st.session_state.messages.append({"role": "user", "content": prompt})
# with st.chat_message("user"):
# st.markdown(prompt)
#
# result = qa({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in
# st.session_state.messages]})
# print(prompt)
#
# with st.chat_message("assistant"):
# message_placeholder = st.empty()
# full_response = result["answer"]
# message_placeholder.markdown(full_response + "|")
# message_placeholder.markdown(full_response)
# print(full_response)
# st.session_state.messages.append({"role": "assistant", "content": full_response})
if __name__ == "__main__":
main()