-
Notifications
You must be signed in to change notification settings - Fork 0
/
__main__.py
67 lines (52 loc) · 2.12 KB
/
__main__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.chat_models import ChatOllama
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
import os
def main():
dir_path = "./vector"
embeddings = OllamaEmbeddings(
model="nomic-embed-text"
)
# if embedding does not exists load the pdf and turn it into vector
if not os.path.isdir(dir_path):
# Define the text to split
print("load pdf file...")
loader = PyPDFLoader("z.pdf")
pages = loader.load()
# Split the text into chunks
print("split pdf into documents")
splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=100)
texts = splitter.split_documents(pages)
# Create the vector store using FAISS
print("create vectorstore...")
docsearch = FAISS.from_documents(texts, embeddings)
docsearch.save_local("vector")
else:
# else load the vector
docsearch = FAISS.load_local("vector", embeddings=embeddings, allow_dangerous_deserialization=True)
template = """
Answer the question in your own words from the context given to you.
If questions are asked where there is no relevant context available, please answer from
what you know.
Context: {context}
Human: {question}
Assistant:
"""
prompt = PromptTemplate(
input_variables=["context", "question"],
template=template
)
llm = ChatOllama(
model="llama3:8b",
temperature=0
)
# retrive the data using RAG technique, embed query search for similarity in vector and than take context and question together and ask the model
qa = RetrievalQA.from_chain_type(llm, retriever=docsearch.as_retriever(), chain_type_kwargs={'prompt': prompt})
result1 = qa({"query": "what is proxy pattern?"})
print(result1)
if __name__ == '__main__':
main()