-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
133 lines (105 loc) · 5.32 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import os
from langchain_core.messages import AIMessage, HumanMessage
# To load web pages as documents
from langchain_community.document_loaders import WebBaseLoader
# To split documents into smaller chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Vector store for storing document embeddings
from langchain_community.vectorstores import Chroma
# For generating embeddings and interacting with OpenAI's LLM
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from dotenv import load_dotenv # To load environment variables
# For creating chat prompts
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# To create retrieval chains
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
# To combine document chains
from langchain.chains.combine_documents import create_stuff_documents_chain
# Load environment variables from a .env file
load_dotenv()
API = os.getenv('API')
def get_vectorstore_from_url(url):
# Load the webpage content as a document
loader = WebBaseLoader(url , header_template={"User-Agent":"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36"})
document = loader.load()
# Split the document into smaller chunks for processing
text_splitter = RecursiveCharacterTextSplitter()
document_chunks = text_splitter.split_documents(document)
# Create a vector store from the document chunks
vector_store = Chroma.from_documents(document_chunks, OpenAIEmbeddings(model="text-embedding-3-small",api_key=API))
return vector_store
def get_context_retriever_chain(vector_store):
# Initialize the language model with specified parameters
llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=API)
# Create a retriever from the vector store
retriever = vector_store.as_retriever()
# Define a prompt for generating search queries based on the conversation
prompt = ChatPromptTemplate.from_messages([
MessagesPlaceholder(variable_name="chat_history"),
("user", "{input}"),
("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
])
# Create a history-aware retriever chain
retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
return retriever_chain
def get_conversational_rag_chain(retriever_chain):
# Initialize the language model with specified parameters
llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=API)
# Define a prompt for answering user questions based on context
prompt = ChatPromptTemplate.from_messages([
("system",
"Answer the user's questions based on the below context:\n\n{context}"),
MessagesPlaceholder(variable_name="chat_history"),
("user", "{input}"),
])
# Create a chain for combining documents
stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
# Create a retrieval chain using the retriever chain and the document chain
return create_retrieval_chain(retriever_chain, stuff_documents_chain)
def get_response(user_input):
# Get the retriever chain from the session state vector store
retriever_chain = get_context_retriever_chain(
st.session_state.vector_store)
# Get the conversational RAG (Retrieval-Augmented Generation) chain
conversation_rag_chain = get_conversational_rag_chain(retriever_chain)
# Generate a response based on the user input and chat history
response = conversation_rag_chain.invoke({
"chat_history": st.session_state.chat_history,
"input": user_input
})
return response['answer']
# Configure the Streamlit app
st.set_page_config(page_title="Chat with websites", page_icon="🤖")
st.title("Chat with websites")
# Sidebar for entering the website URL
with st.sidebar:
st.header("Settings")
website_url = st.text_input("Website URL")
# Display information if no URL is entered
if website_url is None or website_url == "":
st.info("Please enter a website URL")
else:
# Initialize session state variables if they don't exist
if "chat_history" not in st.session_state:
st.session_state.chat_history = [
AIMessage(content="Hello, I am a bot. How can I help you?"),
]
if "vector_store" not in st.session_state:
st.session_state.vector_store = get_vectorstore_from_url(website_url)
# Handle user input
user_query = st.chat_input("Type your message here...")
if user_query is not None and user_query != "":
# Get the response for the user query
response = get_response(user_query)
# Update the chat history with the user query and the bot response
st.session_state.chat_history.append(HumanMessage(content=user_query))
st.session_state.chat_history.append(AIMessage(content=response))
# Display the chat history
for message in st.session_state.chat_history:
if isinstance(message, AIMessage):
with st.chat_message("AI"):
st.write(message.content)
elif isinstance(message, HumanMessage):
with st.chat_message("Human"):
st.write(message.content)