From 2f7e22f3912b939c1a4b7965786443297eb220ee Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:27:07 -0400
Subject: [PATCH 01/14] Create genai_agent.py

---
 misc/genai_agent.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 misc/genai_agent.py

diff --git a/misc/genai_agent.py b/misc/genai_agent.py
new file mode 100644
index 0000000..bbc2c5c
--- /dev/null
+++ b/misc/genai_agent.py
@@ -0,0 +1,30 @@
+import crewai
+from langchain_community.llms import Ollama
+from langchain_community.tools import DuckDuckGoSearchRun
+from crewai_tools import tool
+
+llm = Ollama(model="mistral")
+
+def callback_function(output):
+    print(f"Task completed: {output.raw_output}")
+
+@tool("DuckDuckGoSearch")
+def search(search_query: str) -> str:
+    """Search the web for information on a given topic"""
+    return DuckDuckGoSearchRun().run(search_query)
+
+agent = crewai.Agent(
+    role="Calendar",
+    goal="What day of the month is Thanksgiving on in 2024?",
+    backstory="You are a calendar assistant. You provide information about dates. ",
+    tools=[search],
+    llm=llm,
+    allow_delegation=False, verbose=True)
+
+task = crewai.Task(description="What day of the month is Thanksgiving on in 2024?",
+                   agent=agent,
+                   expected_output="Date of Thanksgiving in the current year")
+
+crew = crewai.Crew(agents=[agent], tasks=[task], verbose=True)
+res = crew.kickoff()
+print(res)

From 22a13955fef63d1ed2354271013b7439aed6decb Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:27:47 -0400
Subject: [PATCH 02/14] Create genai_agent2.py

---
 misc/genai_agent2.py | 49 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 misc/genai_agent2.py

diff --git a/misc/genai_agent2.py b/misc/genai_agent2.py
new file mode 100644
index 0000000..6d212ff
--- /dev/null
+++ b/misc/genai_agent2.py
@@ -0,0 +1,49 @@
+from crewai import Crew, Agent, Task 
+from langchain_community.llms import Ollama
+from langchain_community.vectorstores import Chroma
+from langchain_community.embeddings import SentenceTransformerEmbeddings  # Use the wrapper
+from langchain_community.document_loaders import UnstructuredURLLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from crewai_tools import BaseTool
+from crewai_tools import tool
+from crewai_tools import WebsiteSearchTool
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+
+searchtool = WebsiteSearchTool(
+    website="https://www.almanac.com/thanksgiving-day", 
+    config=dict(
+        llm=dict(
+            provider="ollama", # or google, openai, anthropic, llama2, ...
+            config=dict(
+                model="phi3",
+            ),
+        ),
+        embedder=dict(
+            provider="ollama",
+                config=dict(
+                    model="mxbai-embed-large:latest",
+                    
+            ),
+        ),
+    )
+)
+
+  
+   
+llm = Ollama(model="phi3")
+
+agent = Agent(
+    role="Calendar",
+    goal="What day of the month is Thanksgiving on in 2024?",
+    backstory="You are a calendar assistant. You provide information about dates. ",
+    tools=[searchtool],
+    llm=llm,
+    allow_delegation=False, verbose=True)
+
+task = Task(description="What day of the month is Thanksgiving on in 2024?",
+                   agent=agent,
+                   expected_output="Date of Thanksgiving in 2024")
+
+crew = Crew(agents=[agent], tasks=[task], verbose=True)
+res = crew.kickoff()
+print(res)

From 32a07c274a9d4cffc2fbfa8dbfa4bab416d037c7 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:28:17 -0400
Subject: [PATCH 03/14] Create genai_crew.py

---
 misc/genai_crew.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 misc/genai_crew.py

diff --git a/misc/genai_crew.py b/misc/genai_crew.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/misc/genai_crew.py
@@ -0,0 +1 @@
+

From db1e22c570b9b4444fa73e2ed6b46631814a2b9f Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:28:46 -0400
Subject: [PATCH 04/14] Create gen_rag.py

---
 misc/gen_rag.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 misc/gen_rag.py

diff --git a/misc/gen_rag.py b/misc/gen_rag.py
new file mode 100644
index 0000000..2566484
--- /dev/null
+++ b/misc/gen_rag.py
@@ -0,0 +1,60 @@
+import sys
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.vectorstores import Chroma
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+from langchain.prompts import ChatPromptTemplate
+from langchain_community.llms import Ollama
+
+# set some config variables for ChromaDB
+CHROMA_DATA_PATH = "vdb_data/"
+DOC_PATH = sys.argv[1]
+
+llm = Ollama(model="mistral")
+
+# load your pdf doc
+loader = PyPDFLoader(DOC_PATH)
+pages = loader.load()
+
+# split the doc into smaller chunks i.e. chunk_size=500
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+chunks = text_splitter.split_documents(pages)
+
+
+embeddings = FastEmbedEmbeddings()  
+
+# embed the chunks as vectors and load them into the database
+db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_DATA_PATH)
+
+PROMPT_TEMPLATE = """
+Answer the question based only on the following context:
+{context}
+Answer the question based on the above context: {question}.
+Provide a detailed answer.
+Don’t justify your answers.
+Don’t give information not mentioned in the CONTEXT INFORMATION.
+Do not say "according to the context" or "mentioned in the context" or similar.
+"""
+
+
+while True:
+    query = input("\nQuery: ")
+    if query == "exit":
+        break
+    if query.strip() == "":
+        continue
+    # retrieve context - top 5 most relevant (closests) chunks to the query vector 
+    # (by default Langchain is using cosine distance metric)
+    docs_chroma = db_chroma.similarity_search_with_score(query, k=5)
+
+    # generate an answer based on given user query and retrieved context information
+    context_text = "\n\n".join([doc.page_content for doc, _score in docs_chroma])
+    # you can use a prompt template
+
+    # load retrieved context and user query in the prompt template
+    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
+    prompt = prompt_template.format(context=context_text, question=query)
+
+    # call LLM model to generate the answer based on the given context and query
+    response_text = llm.invoke(prompt)
+    print(response_text)

From 3f19a4f063708be4c41bfbe5e1d23c3d0ef6689f Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:29:16 -0400
Subject: [PATCH 05/14] Create genai_rag2.py

---
 misc/genai_rag2.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 misc/genai_rag2.py

diff --git a/misc/genai_rag2.py b/misc/genai_rag2.py
new file mode 100644
index 0000000..3e4b0db
--- /dev/null
+++ b/misc/genai_rag2.py
@@ -0,0 +1,41 @@
+import os
+import wget
+from langchain.vectorstores import Qdrant
+from langchain_community.document_loaders import BSHTMLLoader
+from langchain.chains import RetrievalQA
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
+from langchain_community.llms import Ollama
+
+#download War and Peace by Tolstoy
+# wget.download("http://az.lib.ru/t/tolstoj_lew_nikolaewich/text_0073.shtml")
+wget.download("https://www.cs.cmu.edu/~rgs/alice-I.html")
+
+#load text from html
+loader = BSHTMLLoader("alice-I.html", open_encoding='ISO-8859-1')
+war_and_peace = loader.load()
+
+#init Vector DB
+
+embeddings = FastEmbedEmbeddings()  
+
+doc_store = Qdrant.from_documents(
+    war_and_peace, 
+    embeddings,
+    location=":memory:", 
+    collection_name="docs",
+)
+
+llm = Ollama(model="mistral")
+# ask questions
+
+while True:
+    question = input('Your question: ')
+    qa = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="stuff",
+        retriever=doc_store.as_retriever(),
+        return_source_documents=False,
+    )
+
+    result = qa(question)
+    print(f"Answer: {result}")

From 7aa7e5f23bc92036e432363e125d0d5a2535a942 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:29:46 -0400
Subject: [PATCH 06/14] Create genai_rag3.py

---
 misc/genai_rag3.py | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 misc/genai_rag3.py

diff --git a/misc/genai_rag3.py b/misc/genai_rag3.py
new file mode 100644
index 0000000..6575f2b
--- /dev/null
+++ b/misc/genai_rag3.py
@@ -0,0 +1,45 @@
+import os
+from langchain_community.llms import Ollama
+from dotenv import load_dotenv
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain_community.document_loaders import TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Chroma
+from langchain.chains import create_retrieval_chain
+from langchain import hub
+from langchain.chains.combine_documents import create_stuff_documents_chain
+
+llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434")
+
+
+embed_model = OllamaEmbeddings(
+    model="mistral",
+    base_url='http://127.0.0.1:11434'
+)
+
+
+text = """
+    In the lush canopy of a tropical rainforest, two mischievous monkeys, Coco and Mango, swung from branch to branch, their playful antics echoing through the trees. They were inseparable companions, sharing everything from juicy fruits to secret hideouts high above the forest floor. One day, while exploring a new part of the forest, Coco stumbled upon a beautiful orchid hidden among the foliage. Entranced by its delicate petals, Coco plucked it and presented it to Mango with a wide grin. Overwhelmed by Coco's gesture of friendship, Mango hugged Coco tightly, cherishing the bond they shared. From that day on, Coco and Mango ventured through the forest together, their friendship growing stronger with each passing adventure. As they watched the sun dip below the horizon, casting a golden glow over the treetops, they knew that no matter what challenges lay ahead, they would always have each other, and their hearts brimmed with joy.
+    """
+
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=128)
+chunks = text_splitter.split_text(text)
+
+vector_store = Chroma.from_texts(chunks, embed_model)
+
+
+retriever = vector_store.as_retriever()
+
+chain = create_retrieval_chain(combine_docs_chain=llm,retriever=retriever)
+
+retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
+
+combine_docs_chain = create_stuff_documents_chain(
+    llm, retrieval_qa_chat_prompt
+)
+
+retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)    
+
+response = retrieval_chain.invoke({"input": "Tell me name of monkeys and where do they live"})
+print(response['answer'])
+

From 1fe38f4633efed99ed86c57083082e828f574242 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:30:12 -0400
Subject: [PATCH 07/14] Create genai_rag4.py

---
 misc/genai_rag4.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 misc/genai_rag4.py

diff --git a/misc/genai_rag4.py b/misc/genai_rag4.py
new file mode 100644
index 0000000..56a1c20
--- /dev/null
+++ b/misc/genai_rag4.py
@@ -0,0 +1,46 @@
+import os
+from langchain_community.llms import Ollama
+from dotenv import load_dotenv
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain_community.document_loaders import TextLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Chroma
+from langchain.chains import create_retrieval_chain
+from langchain import hub
+from langchain.chains.combine_documents import create_stuff_documents_chain
+
+llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434")
+
+
+embed_model = OllamaEmbeddings(
+    model="mistral",
+    base_url='http://127.0.0.1:11434'
+)
+
+
+text = """
+    In the lush canopy of a tropical rainforest, two mischievous monkeys, Coco and Mango, swung from branch to branch, their playful antics echoing through the trees. They were inseparable companions, sharing everything from juicy fruits to secret hideouts high above the forest floor. One day, while exploring a new part of the forest, Coco stumbled upon a beautiful orchid hidden among the foliage. Entranced by its delicate petals, Coco plucked it and presented it to Mango with a wide grin. Overwhelmed by Coco's gesture of friendship, Mango hugged Coco tightly, cherishing the bond they shared. From that day on, Coco and Mango ventured through the forest together, their friendship growing stronger with each passing adventure. As they watched the sun dip below the horizon, casting a golden glow over the treetops, they knew that no matter what challenges lay ahead, they would always have each other, and their hearts brimmed with joy.
+    """
+
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=128)
+chunks = text_splitter.split_text(text)
+
+vector_store = Chroma.from_texts(chunks, embed_model)
+
+
+retriever = vector_store.as_retriever()
+
+chain = create_retrieval_chain(combine_docs_chain=llm,retriever=retriever)
+
+retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
+
+combine_docs_chain = create_stuff_documents_chain(
+    llm, retrieval_qa_chat_prompt
+)
+
+retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)    
+
+response = retrieval_chain.invoke({"input": "Tell me name of monkeys and where do they live"})
+print(response['answer'])
+
+

From 2f6ccfafc164a2fa09aa50b9316a26ae81fc8b92 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:30:41 -0400
Subject: [PATCH 08/14] Create genai_rag5.py

---
 misc/genai_rag5.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 misc/genai_rag5.py

diff --git a/misc/genai_rag5.py b/misc/genai_rag5.py
new file mode 100644
index 0000000..7b48cba
--- /dev/null
+++ b/misc/genai_rag5.py
@@ -0,0 +1,62 @@
+import sys
+from langchain_community.llms import Ollama
+from langchain_community.embeddings import OllamaEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Chroma
+from langchain.chains import create_retrieval_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain import hub
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from tempfile import NamedTemporaryFile
+
+llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434")
+
+
+pdf_path = sys.argv[1]
+
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with NamedTemporaryFile(dir='.', suffix='.pdf') as f:
+        f.write(pdf_path.getbuffer())
+        with open(f.name, "rb") as file:
+            reader = PyPDF2.PdfReader(f.name)
+            for page_num in range(len(reader.pages)):
+                text += reader.pages[page_num].extract_text()      
+            f.close()      
+    return text
+
+
+
+embed_model = OllamaEmbeddings(
+    model="mistral",
+    base_url='http://127.0.0.1:11434'
+)
+
+
+text = extract_text_from_pdf(pdf_path)
+
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=128)
+chunks = text_splitter.split_text(text)
+
+vector_store = Chroma.from_texts(chunks, embed_model)
+
+
+retriever = vector_store.as_retriever()
+
+chain = create_retrieval_chain(combine_docs_chain=llm,retriever=retriever)
+
+retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
+
+combine_docs_chain = create_stuff_documents_chain(
+    llm, retrieval_qa_chat_prompt
+)
+
+retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)    
+
+prompt = ChatPromptTemplate.from_messages(
+    [("system","Tell me name of monkeys and where do they live?\n\n{context}")]
+)
+
+response = retrieval_chain.invoke({"context": retrieval_chain})
+print(response['answer'])

From 5c53609410f165ba2112b05996f22649cb3fee62 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:31:16 -0400
Subject: [PATCH 09/14] Create genai_rag6.py

---
 misc/genai_rag6.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 misc/genai_rag6.py

diff --git a/misc/genai_rag6.py b/misc/genai_rag6.py
new file mode 100644
index 0000000..f9d8471
--- /dev/null
+++ b/misc/genai_rag6.py
@@ -0,0 +1,44 @@
+import fitz  # PyMuPDF
+from transformers import RagTokenizer, RagTokenForGeneration
+
+def extract_text_from_pdf(pdf_path):
+    """Extracts text from a given PDF file."""
+    doc = fitz.open(pdf_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    doc.close()
+    return text
+
+def setup_rag_model():
+    """Sets up the RAG tokenizer and model."""
+    tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
+    model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq")
+    return tokenizer, model
+
+def answer_question(question, context, tokenizer, model):
+    """Generates an answer to the question based on the context using RAG."""
+    inputs = tokenizer(question, context, return_tensors="pt", truncation=True, padding=True)
+    with tokenizer.as_target_tokenizer():
+        output_ids = model.generate(**inputs)
+    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
+
+# Example usage
+pdf_path = 'sample.pdf'  # Path to your PDF file
+context = extract_text_from_pdf(pdf_path)
+tokenizer, model = setup_rag_model()
+question = "What is the main topic of the document?"
+answer = answer_question(question, context, tokenizer, model)
+print("Answer:", answer)
+
+
+### Running the Example
+#1. Replace `'sample.pdf'` with the path to your PDF file.
+#2. Make sure to have a valid question that relates to the content of the PDF.
+#3. Execute the script.
+
+### How It Works
+#- **PDF Text Extraction**: The `extract_text_from_pdf` function reads the PDF and extracts all text from it. This text serves as the context for generating answers.
+#- **Model Setup**: The `setup_rag_model` function loads the pre-trained RAG tokenizer and model.
+#- **Answer Generation**: The `answer_question` function uses the model and tokenizer to generate an answer to the input question based on the extracted PDF text.
+

From 38abb535600e08923a050ab36eaf5c3a6b647585 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:31:54 -0400
Subject: [PATCH 10/14] Create localrag.py

---
 misc/localrag.py | 154 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 154 insertions(+)
 create mode 100644 misc/localrag.py

diff --git a/misc/localrag.py b/misc/localrag.py
new file mode 100644
index 0000000..7eacce0
--- /dev/null
+++ b/misc/localrag.py
@@ -0,0 +1,154 @@
+import torch
+import ollama
+import os
+from openai import OpenAI
+import argparse
+import json
+
+# ANSI escape codes for colors
+PINK = '\033[95m'
+CYAN = '\033[96m'
+YELLOW = '\033[93m'
+NEON_GREEN = '\033[92m'
+RESET_COLOR = '\033[0m'
+
+# Function to open a file and return its contents as a string
+def open_file(filepath):
+    with open(filepath, 'r', encoding='utf-8') as infile:
+        return infile.read()
+
+# Function to get relevant context from the vault based on user input
+def get_relevant_context(rewritten_input, vault_embeddings, vault_content, top_k=3):
+    if vault_embeddings.nelement() == 0:  # Check if the tensor has any elements
+        return []
+    # Encode the rewritten input
+    input_embedding = ollama.embeddings(model='mxbai-embed-large', prompt=rewritten_input)["embedding"]
+    # Compute cosine similarity between the input and vault embeddings
+    cos_scores = torch.cosine_similarity(torch.tensor(input_embedding).unsqueeze(0), vault_embeddings)
+    # Adjust top_k if it's greater than the number of available scores
+    top_k = min(top_k, len(cos_scores))
+    # Sort the scores and get the top-k indices
+    top_indices = torch.topk(cos_scores, k=top_k)[1].tolist()
+    # Get the corresponding context from the vault
+    relevant_context = [vault_content[idx].strip() for idx in top_indices]
+    return relevant_context
+
+def rewrite_query(user_input_json, conversation_history, ollama_model):
+    user_input = json.loads(user_input_json)["Query"]
+    context = "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation_history[-2:]])
+    prompt = f"""Rewrite the following query by incorporating relevant context from the conversation history.
+    The rewritten query should:
+    
+    - Preserve the core intent and meaning of the original query
+    - Expand and clarify the query to make it more specific and informative for retrieving relevant context
+    - Avoid introducing new topics or queries that deviate from the original query
+    - DONT EVER ANSWER the Original query, but instead focus on rephrasing and expanding it into a new query
+    
+    Return ONLY the rewritten query text, without any additional formatting or explanations.
+    
+    Conversation History:
+    {context}
+    
+    Original query: [{user_input}]
+    
+    Rewritten query: 
+    """
+    response = client.chat.completions.create(
+        model=ollama_model,
+        messages=[{"role": "system", "content": prompt}],
+        max_tokens=200,
+        n=1,
+        temperature=0.1,
+    )
+    rewritten_query = response.choices[0].message.content.strip()
+    return json.dumps({"Rewritten Query": rewritten_query})
+   
+def ollama_chat(user_input, system_message, vault_embeddings, vault_content, ollama_model, conversation_history):
+    conversation_history.append({"role": "user", "content": user_input})
+    
+    if len(conversation_history) > 1:
+        query_json = {
+            "Query": user_input,
+            "Rewritten Query": ""
+        }
+        rewritten_query_json = rewrite_query(json.dumps(query_json), conversation_history, ollama_model)
+        rewritten_query_data = json.loads(rewritten_query_json)
+        rewritten_query = rewritten_query_data["Rewritten Query"]
+        print(PINK + "Original Query: " + user_input + RESET_COLOR)
+        print(PINK + "Rewritten Query: " + rewritten_query + RESET_COLOR)
+    else:
+        rewritten_query = user_input
+    
+    relevant_context = get_relevant_context(rewritten_query, vault_embeddings, vault_content)
+    if relevant_context:
+        context_str = "\n".join(relevant_context)
+        print("Context Pulled from Documents: \n\n" + CYAN + context_str + RESET_COLOR)
+    else:
+        print(CYAN + "No relevant context found." + RESET_COLOR)
+    
+    user_input_with_context = user_input
+    if relevant_context:
+        user_input_with_context = user_input + "\n\nRelevant Context:\n" + context_str
+    
+    conversation_history[-1]["content"] = user_input_with_context
+    
+    messages = [
+        {"role": "system", "content": system_message},
+        *conversation_history
+    ]
+    
+    response = client.chat.completions.create(
+        model=ollama_model,
+        messages=messages,
+        max_tokens=2000,
+    )
+    
+    conversation_history.append({"role": "assistant", "content": response.choices[0].message.content})
+    
+    return response.choices[0].message.content
+
+# Parse command-line arguments
+print(NEON_GREEN + "Parsing command-line arguments..." + RESET_COLOR)
+parser = argparse.ArgumentParser(description="Ollama Chat")
+parser.add_argument("--model", default="mistral", help="Ollama model to use (default: mistral)")
+args = parser.parse_args()
+
+# Configuration for the Ollama API client
+print(NEON_GREEN + "Initializing Ollama API client..." + RESET_COLOR)
+client = OpenAI(
+    base_url='http://localhost:11434/v1',
+    api_key='mistral'
+)
+
+# Load the vault content
+print(NEON_GREEN + "Loading vault content..." + RESET_COLOR)
+vault_content = []
+if os.path.exists("vault.txt"):
+    with open("vault.txt", "r", encoding='utf-8') as vault_file:
+        vault_content = vault_file.readlines()
+
+# Generate embeddings for the vault content using Ollama
+print(NEON_GREEN + "Generating embeddings for the vault content..." + RESET_COLOR)
+vault_embeddings = []
+for content in vault_content:
+    response = ollama.embeddings(model='mxbai-embed-large', prompt=content)
+    vault_embeddings.append(response["embedding"])
+
+# Convert to tensor and print embeddings
+print("Converting embeddings to tensor...")
+vault_embeddings_tensor = torch.tensor(vault_embeddings) 
+print("Embeddings for each line in the vault:")
+print(vault_embeddings_tensor)
+
+# Conversation loop
+print("Starting conversation loop...")
+conversation_history = []
+system_message = "You are a helpful assistant that is an expert at extracting the most useful information from a given text. Also bring in extra relevant infromation to the user query from outside the given context."
+
+while True:
+    user_input = input(YELLOW + "Ask a query about your documents (or type 'quit' to exit): " + RESET_COLOR)
+    if user_input.lower() == 'quit':
+        break
+    
+    response = ollama_chat(user_input, system_message, vault_embeddings_tensor, vault_content, args.model, conversation_history)
+    print(NEON_GREEN + "Response: \n\n" + response + RESET_COLOR)

From 6847d0118600620386404629dbec2b334bc9823b Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:32:21 -0400
Subject: [PATCH 11/14] Create localrag2.py

---
 misc/localrag2.py | 126 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 misc/localrag2.py

diff --git a/misc/localrag2.py b/misc/localrag2.py
new file mode 100644
index 0000000..ffef78a
--- /dev/null
+++ b/misc/localrag2.py
@@ -0,0 +1,126 @@
+import torch
+import ollama
+import os
+from openai import OpenAI
+import argparse
+import json
+
+# ANSI escape codes for colors
+PINK = '\033[95m'
+CYAN = '\033[96m'
+YELLOW = '\033[93m'
+NEON_GREEN = '\033[92m'
+RESET_COLOR = '\033[0m'
+
+# Function to open a file and return its contents as a string
+def open_file(filepath):
+    with open(filepath, 'r', encoding='utf-8') as infile:
+        return infile.read()
+
+# Function to get related context from the data for user input
+def get_related_context(rewritten_input, data_embeddings, vault_content, top_k=3):
+    if data_embeddings.nelement() == 0:  # Check if the tensor has any elements
+        return []
+    # Encode the rewritten input
+    input_embedding = ollama.embeddings(model='mxbai-embed-large', prompt=rewritten_input)["embedding"]
+    # Compute cosine similarity between the input and vault embeddings
+    cos_scores = torch.cosine_similarity(torch.tensor(input_embedding).unsqueeze(0), vault_embeddings)
+    # Adjust top_k if it's greater than the number of available scores
+    top_k = min(top_k, len(cos_scores))
+    # Sort the scores and get the top-k indices
+    top_indices = torch.topk(cos_scores, k=top_k)[1].tolist()
+    # Get the corresponding context from the vault
+    relevant_context = [data_content[idx].strip() for idx in top_indices]
+    return relevant_context
+
+
+   
+def ollama_chat(user_input, system_message, vault_embeddings, vault_content, ollama_model, conversation_history):
+    conversation_history.append({"role": "user", "content": user_input})
+    
+    if len(conversation_history) > 1:
+        query_json = {
+            "Query": user_input,
+            "Rewritten Query": ""
+        }
+        rewritten_query_json = rewrite_query(json.dumps(query_json), conversation_history, ollama_model)
+        rewritten_query_data = json.loads(rewritten_query_json)
+        rewritten_query = rewritten_query_data["Rewritten Query"]
+        print(PINK + "Original Query: " + user_input + RESET_COLOR)
+        print(PINK + "Rewritten Query: " + rewritten_query + RESET_COLOR)
+    else:
+        rewritten_query = user_input
+    
+    relevant_context = get_relevant_context(rewritten_query, vault_embeddings, vault_content)
+    if relevant_context:
+        context_str = "\n".join(relevant_context)
+        print("Context Pulled from Documents: \n\n" + CYAN + context_str + RESET_COLOR)
+    else:
+        print(CYAN + "No relevant context found." + RESET_COLOR)
+    
+    user_input_with_context = user_input
+    if relevant_context:
+        user_input_with_context = user_input + "\n\nRelevant Context:\n" + context_str
+    
+    conversation_history[-1]["content"] = user_input_with_context
+    
+    messages = [
+        {"role": "system", "content": system_message},
+        *conversation_history
+    ]
+    
+    response = client.chat.completions.create(
+        model=ollama_model,
+        messages=messages,
+        max_tokens=2000,
+    )
+    
+    conversation_history.append({"role": "assistant", "content": response.choices[0].message.content})
+    
+    return response.choices[0].message.content
+
+# Parse command-line arguments
+print(NEON_GREEN + "Parsing command-line arguments..." + RESET_COLOR)
+parser = argparse.ArgumentParser(description="Ollama Chat")
+parser.add_argument("--model", default="mistral", help="Ollama model to use (default: mistral)")
+args = parser.parse_args()
+
+# Configuration for the Ollama API client
+print(NEON_GREEN + "Initializing Ollama API client..." + RESET_COLOR)
+client = OpenAI(
+    base_url='http://localhost:11434/v1',
+    api_key='mistral'
+)
+
+# Load the vault content
+print(NEON_GREEN + "Loading vault content..." + RESET_COLOR)
+vault_content = []
+if os.path.exists("vault.txt"):
+    with open("vault.txt", "r", encoding='utf-8') as vault_file:
+        vault_content = vault_file.readlines()
+
+# Generate embeddings for the vault content using Ollama
+print(NEON_GREEN + "Generating embeddings for the vault content..." + RESET_COLOR)
+vault_embeddings = []
+for content in vault_content:
+    response = ollama.embeddings(model='mxbai-embed-large', prompt=content)
+    vault_embeddings.append(response["embedding"])
+
+# Convert to tensor and print embeddings
+print("Converting embeddings to tensor...")
+vault_embeddings_tensor = torch.tensor(vault_embeddings) 
+print("Embeddings for each line in the vault:")
+print(vault_embeddings_tensor)
+
+# Conversation loop
+print("Starting conversation loop...")
+conversation_history = []
+system_message = "You are a helpful assistant that is an expert at extracting the most useful information from a given text. Also bring in extra relevant infromation to the user query from outside the given context."
+
+while True:
+    user_input = input(YELLOW + "Ask a query about your documents (or type 'quit' to exit): " + RESET_COLOR)
+    if user_input.lower() == 'quit':
+        break
+    
+    response = ollama_chat(user_input, system_message, vault_embeddings_tensor, vault_content, args.model, conversation_history)
+    print(NEON_GREEN + "Response: \n\n" + response + RESET_COLOR)

From c549513ae16d246474f578647721e434f8466179 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:33:08 -0400
Subject: [PATCH 12/14] Create rag_ui.py

---
 misc/rag_ui.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 misc/rag_ui.py

diff --git a/misc/rag_ui.py b/misc/rag_ui.py
new file mode 100644
index 0000000..5e50f7c
--- /dev/null
+++ b/misc/rag_ui.py
@@ -0,0 +1,62 @@
+import streamlit as st
+import PyPDF2
+from transformers import pipeline
+from chromadb import chromadb
+from tempfile import NamedTemporaryFile
+from langchain_community.llms import Ollama
+
+# Initialize the local language model (LLM) for text generation
+llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434")
+
+# Initialize ChromaDB for chunking and embedding
+chromadb = chromadb.Client()
+
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with NamedTemporaryFile(dir='.', suffix='.pdf') as f:
+        f.write(pdf_path.getbuffer())
+        with open(f.name, "rb") as file:
+            reader = PyPDF2.PdfReader(f.name)
+            for page_num in range(len(reader.pages)):
+                text += reader.pages[page_num].extract_text()      
+            f.close()      
+    return text
+
+# Function to perform Retrieval-Augmented Generation (RAG) with PDFs
+def rag_with_pdf(prompt, pdf_path, llm, chromadb, top_k=5):
+    text_from_pdf = extract_text_from_pdf(pdf_path)
+    chunks = chromadb.chunk(text_from_pdf)
+    embeddings = chromadb.embed(chunks)
+    similarity_scores = []
+    for chunk_embed in embeddings:
+        similarity_scores.append(chromadb.similarity(prompt, chunk_embed))
+    best_indices = sorted(range(len(similarity_scores)), key=lambda i: similarity_scores[i], reverse=True)[:top_k]
+    retrieved_chunks = [chunks[i] for i in best_indices]
+    retrieved_text = " ".join(retrieved_chunks)
+    generated_text = llm(prompt, context=retrieved_text, max_length=50, num_return_sequences=1)
+    return generated_text[0]['generated_text']
+
+# Streamlit UI
+def main():
+    st.title("RAG with Local PDFs")
+
+    # Prompt input
+    prompt = st.text_input("Enter Prompt", "")
+
+    # PDF file upload
+    pdf_file = st.file_uploader("Upload PDF File", type=["pdf"])
+
+    if st.button("Generate Text"):
+        if prompt == "":
+            st.warning("Please enter a prompt.")
+        elif pdf_file is None:
+            st.warning("Please upload a PDF file.")
+        else:
+            # Perform RAG with the provided prompt and PDF file
+            generated_text = rag_with_pdf(prompt, pdf_file, llm, chromadb)
+            st.subheader("Generated Text")
+            st.write(generated_text)
+
+if __name__ == "__main__":
+    main()

From 59c450e03a6ef163f20ccbb4ed113365b20c2bee Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:33:40 -0400
Subject: [PATCH 13/14] Create rag_ui2.py

---
 misc/rag_ui2.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 misc/rag_ui2.py

diff --git a/misc/rag_ui2.py b/misc/rag_ui2.py
new file mode 100644
index 0000000..14f8191
--- /dev/null
+++ b/misc/rag_ui2.py
@@ -0,0 +1,50 @@
+import streamlit as st
+import PyPDF2
+from transformers import pipeline
+from chromadb import chromadb
+from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
+from tempfile import NamedTemporaryFile
+from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
+from langchain_community.llms import Ollama
+
+
+# Initialize the local language model (LLM) for text generation
+
+llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434")
+
+# Initialize ChromaDB for chunking and embedding
+chromadb = chromadb.Client()
+
+# Function to perform Retrieval-Augmented Generation (RAG) with PDFs
+def rag_with_pdf(prompt, pdf_path, llm, chromadb, top_k=5):
+    loader = PyPDFLoader(pdf _path)
+    document = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    chunked_documents = text_splitter.split_documents(document)
+
+    Chroma.from_documents(
+        documents=chunked_documents,
+        embedding=embedding_function,
+        collection_name=os.getenv("CHROMA_COLLECTION_NAME"),
+        client=chroma_client,
+    )
+    print(f"Added {len(chunked_documents)} chunks to chroma db")
+
+    chroma_client = chromadb.HttpClient(host=os.getenv("CHROMA_HOST"), port=int(os.getenv("CHROMA_PORT")), settings=Settings())
+    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+
+#    embedding_function = SentenceTransformerEmbeddingFunction()
+#    print(embedding_function([token_split_texts[10]]))
+
+    chroma_collection = chroma_client.get_or_create_collection("CHROMA_COLLECTION_NAME", embedding_function=embedding_function)
+
+    results = collection.query(
+       query_texts=["What are the forecasts for 2024?"],
+       n_results=2 
+    )
+
+    print(results)
+
+   
+if __name__ == "__main__":
+    main()

From 8736f3a34e5a2fe926409a4934ef41ab68d771b5 Mon Sep 17 00:00:00 2001
From: Brent Laster <bclaster@nclasters.org>
Date: Mon, 5 Aug 2024 16:34:11 -0400
Subject: [PATCH 14/14] Create rag_ui3.py

---
 misc/rag_ui3.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 misc/rag_ui3.py

diff --git a/misc/rag_ui3.py b/misc/rag_ui3.py
new file mode 100644
index 0000000..e534edb
--- /dev/null
+++ b/misc/rag_ui3.py
@@ -0,0 +1,91 @@
+import streamlit as st
+import PyPDF2
+from transformers import pipeline
+from chromadb import chromadb
+from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
+from tempfile import NamedTemporaryFile
+from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
+from langchain_community.llms import Ollama
+
+
+# Initialize the local language model (LLM) for text generation
+
+llm = Ollama(model="mistral", base_url="http://127.0.0.1:11434")
+
+# Initialize ChromaDB for chunking and embedding
+chromadb = chromadb.Client()
+
+# Function to extract text from a PDF file
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with NamedTemporaryFile(dir='.', suffix='.pdf') as f:
+        f.write(pdf_path.getbuffer())
+        with open(f.name, "rb") as file:
+            reader = PyPDF2.PdfReader(f.name)
+            for page_num in range(len(reader.pages)):
+                text += reader.pages[page_num].extract_text()      
+            f.close()      
+    return text
+
+# Function to perform Retrieval-Augmented Generation (RAG) with PDFs
+def rag_with_pdf(prompt, pdf_path, llm, chromadb, top_k=5):
+    text_from_pdf = extract_text_from_pdf(pdf_path)
+    # print(word_wrap(text_from_pdf))
+    print(text_from_pdf)
+    character_splitter = RecursiveCharacterTextSplitter(
+        separators=["\n\n", "\n", ". ", " ", ""],
+        chunk_size=1000,
+        chunk_overlap=0
+    )
+    character_split_texts = character_splitter.split_text('\n\n'.join(text_from_pdf ))
+
+    # print(word_wrap(character_split_texts[10]))
+    print(character_split_texts)
+    print(f"\nTotal chunks: {len(character_split_texts)}")
+    token_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0, tokens_per_chunk=256)
+
+    token_split_texts = []
+    for text in character_split_texts:
+        token_split_texts += token_splitter.split_text(text)
+
+    # print(word_wrap(token_split_texts[10]))
+    print(token_split_texts[10])
+    print(f"\nTotal chunks: {len(token_split_texts)}")
+
+    embedding_function = SentenceTransformerEmbeddingFunction()
+    print(embedding_function([token_split_texts[10]]))
+
+    chroma_collection = chromadb.get_or_create_collection("example", embedding_function=embedding_function)
+
+    ids = [str(i) for i in range(len(token_split_texts))]
+
+    chroma_collection.add(ids=ids, documents=token_split_texts)
+    chroma_collection.count()
+    results = chroma_collection.query(query_texts=[prompt], n_results=5)
+    retrieved_documents = results['documents'][0]
+ 
+    return results['documents'][0]
+
+# Streamlit UI
+def main():
+    st.title("RAG with Local PDFs")
+
+    # Prompt input
+    prompt = st.text_input("Enter Prompt", "")
+
+    # PDF file upload
+    pdf_file = st.file_uploader("Upload PDF File", type=["pdf"])
+
+    if st.button("Generate Text"):
+        if prompt == "":
+            st.warning("Please enter a prompt.")
+        elif pdf_file is None:
+            st.warning("Please upload a PDF file.")
+        else:
+            # Perform RAG with the provided prompt and PDF file
+            generated_text = rag_with_pdf(prompt, pdf_file, llm, chromadb)
+            st.subheader("Generated Text")
+            st.write(generated_text)
+
+if __name__ == "__main__":
+    main()