-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrieval_chain.py
64 lines (53 loc) · 2.14 KB
/
retrieval_chain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from typing import Dict, Any, List
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain.chains import RetrievalQA
from llm_client import NvidiaLLM
from config import Config
class RetrievalChain:
"""Sets up the Retrieval Augmented Generation (RAG) chain."""
def __init__(self, documents: List[Document]):
# Initialize embeddings with specific model
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2",
model_kwargs={'device': 'cpu'}
)
# Create vector store
self.vectorstore = FAISS.from_documents(documents, self.embeddings)
self.retriever = self.vectorstore.as_retriever(
search_type="similarity",
search_kwargs={"k": 5}
)
# Enhanced prompt template
self.prompt_template = PromptTemplate(
input_variables=["context", "question"],
template="""
Use the following context to answer the question:
Context:
{context}
Question:
{question}
Provide a detailed analysis based on the historical data:
"""
)
# Initialize LLM and chain
self.llm = NvidiaLLM(
model_name=Config.MODEL_NAME,
temperature=Config.COMPLETION_PARAMS["temperature"],
max_tokens=Config.COMPLETION_PARAMS["max_tokens"]
)
self.chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.retriever,
return_source_documents=True,
chain_type_kwargs={"prompt": self.prompt_template}
)
def query(self, question: str) -> Dict[str, Any]:
"""Query the RAG chain with error handling."""
try:
return self.chain.invoke({"query": question}) # Changed from question to query
except Exception as e:
raise RuntimeError(f"Error during RAG chain query: {str(e)}")