Skip to content

Commit 72a828f

Browse files
committed
New Freebie: Local-RAG
1 parent e062879 commit 72a828f

File tree

8 files changed

+136
-50
lines changed

8 files changed

+136
-50
lines changed

freebies/local-rag/README.md

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,27 @@ This is a Retrieval-Augmented Generation (RAG) chatbot application using Langcha
1212
## Setup
1313

1414
1. Clone this repository:
15-
```
16-
git clone https://github.com/yourusername/rag-chatbot.git
17-
cd rag-chatbot
18-
```
15+
```
16+
git clone https://github.com/yourusername/rag-chatbot.git
17+
cd rag-chatbot
18+
```
1919

2020
2. Run the setup script:
21-
```
22-
python setup.py
23-
```
24-
This will download necessary files, install dependencies, and set up Ollama.
21+
22+
```python
23+
python setup.py
24+
```
25+
This will download necessary files, install dependencies, and set up Ollama.
2526

2627
3. Activate the virtual environment:
27-
- On Windows: `.\venv\Scripts\activate`
28-
- On macOS/Linux: `source venv/bin/activate`
28+
- On Windows: `.\venv\Scripts\activate`
29+
- On macOS/Linux: `source venv/bin/activate`
2930

3031
4. Run the application:
31-
```
32-
python app.py
33-
```
32+
33+
```python
34+
python app.py
35+
```
3436

3537
5. Open your web browser and go to `http://localhost:7860` to interact with the chatbot.
3638

@@ -59,14 +61,11 @@ vector_store = get_vector_store()
5961
add_documents_to_store(vector_store, new_chunks)
6062
```
6163

62-
## Troubleshooting
63-
64+
### Troubleshooting
6465
If you encounter any issues, please check the application logs or file an issue on the GitHub repository.
6566

66-
## Contributing
67-
67+
### Contributing
6868
Contributions are welcome! Please feel free to submit a Pull Request.
6969

70-
## License
71-
72-
This project is licensed under the MIT License.
70+
### License
71+
This project is licensed under the GPL-3 license.

freebies/local-rag/app.py

Lines changed: 106 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
"""
2-
Main application script for the RAG Chatbot.
3-
"""
4-
51
import gradio as gr
62
import logging
7-
from chatbot import chat_ollama
8-
from data_loader import get_document_chunks
9-
from vector_store import get_vector_store, VectorStoreError
3+
from scrapegraphai.graphs import SmartScraperGraph
4+
from langchain.document_loaders import UnstructuredMarkdownLoader
5+
from langchain.text_splitter import RecursiveCharacterTextSplitter
6+
from langchain.vectorstores import Chroma
7+
from langchain.embeddings import OllamaEmbeddings
8+
from langchain.schema import StrOutputParser
9+
from langchain.schema.runnable import RunnablePassthrough
1010
from config import CONFIG
11+
from vector_store import VectorStoreError, get_vector_store
12+
from chatbot import chat_ollama
1113

1214
# Set up logging
1315
logging.basicConfig(
@@ -16,6 +18,80 @@
1618
)
1719
logger = logging.getLogger(__name__)
1820

21+
def scrape_with_scrapegraphai(url):
22+
"""Scrape content using ScrapeGraphAI library."""
23+
graph_config = {
24+
"llm": {
25+
"model": CONFIG['OLLAMA_MODEL'],
26+
"temperature": 0,
27+
"format": "json",
28+
"base_url": CONFIG['OLLAMA_URL'],
29+
},
30+
"embeddings": {
31+
"model": "ollama/nomic-embed-text",
32+
"base_url": CONFIG['OLLAMA_URL'],
33+
},
34+
"verbose": True,
35+
}
36+
smart_scraper_graph = SmartScraperGraph(
37+
prompt="Extract all the text content",
38+
source=url,
39+
config=graph_config
40+
)
41+
result = smart_scraper_graph.run()
42+
markdown_path = "scraped_content.md"
43+
with open(markdown_path, "w", encoding="utf-8") as file:
44+
for item in result['content']:
45+
file.write(item + "\n")
46+
47+
return markdown_path
48+
49+
def ingest_markdown(markdown_path):
50+
"""Ingest the markdown content into the vector store."""
51+
loader = UnstructuredMarkdownLoader(markdown_path)
52+
docs = loader.load()
53+
54+
chunk_size = CONFIG['CHUNK_SIZE']
55+
chunk_overlap = CONFIG['CHUNK_OVERLAP']
56+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
57+
splits = text_splitter.split_documents(docs)
58+
59+
embeddings = OllamaEmbeddings(base_url=CONFIG['OLLAMA_URL'], model=CONFIG['OLLAMA_MODEL'])
60+
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=CONFIG['CHROMA_PATH'])
61+
retriever = vectorstore.as_retriever()
62+
63+
prompt_template = "Your local prompt template here"
64+
65+
def format_docs(docs):
66+
return "\n\n".join(doc.page_content for doc in docs)
67+
68+
rag_chain = (
69+
{"context": retriever | format_docs, "question": RunnablePassthrough()}
70+
| prompt_template
71+
| chat_ollama # Using local Ollama model for processing
72+
| StrOutputParser()
73+
)
74+
75+
return vectorstore, rag_chain
76+
77+
def ingest_url(url):
78+
"""Ingest content from the URL into the vector store."""
79+
try:
80+
markdown_path = scrape_with_scrapegraphai(url)
81+
vectorstore, rag_chain = ingest_markdown(markdown_path)
82+
return "Content ingested successfully!", vectorstore, rag_chain
83+
except Exception as e:
84+
logger.error(f"Error ingesting content from {url}: {str(e)}")
85+
return f"Failed to ingest content from {url}: {str(e)}", None, None
86+
87+
def query_vectorstore(query, rag_chain):
88+
"""Query the vector store with a given query."""
89+
if rag_chain:
90+
response = rag_chain.invoke(query)
91+
return response
92+
else:
93+
return "Vector store not initialized."
94+
1995
def main():
2096
"""Initialize and run the RAG Chatbot application."""
2197
try:
@@ -27,20 +103,30 @@ def main():
27103
logger.info("Loading existing vector database...")
28104
get_vector_store()
29105

30-
gradio_interface = gr.ChatInterface(
31-
chat_ollama,
32-
chatbot=gr.Chatbot(),
33-
textbox=gr.Textbox(placeholder="Example: Who is Alice?", container=False, scale=7),
34-
title="The Ollama RAG Chatbot",
35-
description=f"Ask the {CONFIG['OLLAMA_MODEL']} chatbot a question!",
36-
theme='gradio/base',
37-
retry_btn=None,
38-
undo_btn="Delete Previous",
39-
clear_btn="Clear",
40-
)
106+
with gr.Blocks() as gradio_interface:
107+
gr.Markdown("## The Ollama RAG Chatbot")
108+
with gr.Row():
109+
with gr.Column():
110+
url_input = gr.Textbox(label="Enter URL to Ingest", placeholder="Example: https://example.com/article")
111+
ingest_button = gr.Button("Ingest URL")
112+
ingestion_status = gr.Textbox(label="Ingestion Status", interactive=False)
113+
chatbot = gr.Chatbot()
114+
user_input = gr.Textbox(placeholder="Example: Who is Alice?", container=False, scale=7)
115+
send_button = gr.Button("Send")
116+
117+
def ingest_callback(url):
118+
status, vectorstore, rag_chain = ingest_url(url)
119+
return status, vectorstore, rag_chain
120+
121+
def query_callback(user_input, rag_chain):
122+
response = query_vectorstore(user_input, rag_chain)
123+
return response
124+
125+
ingest_button.click(ingest_callback, inputs=url_input, outputs=[ingestion_status, chatbot, None])
126+
send_button.click(query_callback, inputs=user_input, outputs=chatbot)
41127

42128
logger.info("Starting Gradio interface...")
43-
gradio_interface.launch()
129+
gradio_interface.launch(server_name="0.0.0.0", server_port=7860, inbrowser=True)
44130
except VectorStoreError as e:
45131
logger.error(f"Vector store error: {str(e)}")
46132
print(f"An error occurred with the vector store: {str(e)}")
@@ -49,4 +135,4 @@ def main():
49135
print(f"An unexpected error occurred: {str(e)}")
50136

51137
if __name__ == "__main__":
52-
main()
138+
main()

freebies/local-rag/chatbot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,4 @@ def chat_ollama(message, history):
7575
return "I'm sorry, but I'm having trouble generating a response right now."
7676
except Exception as e:
7777
logger.error(f"Unexpected error in chat_ollama: {str(e)}")
78-
return "I apologize, but I'm experiencing an unexpected issue. Please try again later."
78+
return "I apologize, but I'm experiencing an unexpected issue. Please try again later."

freebies/local-rag/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@
3333

3434
# Application settings
3535
'DEBUG': os.getenv('DEBUG', 'False').lower() == 'true',
36-
}
36+
}

freebies/local-rag/data_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,4 @@ def add_new_document(file_path):
6363
return chunks
6464
except Exception as e:
6565
logger.error(f"Error adding new document {file_path}: {str(e)}")
66-
raise
66+
raise

freebies/local-rag/llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@ def get_ollama():
2121
return ollama
2222
except Exception as e:
2323
logger.error(f"Error initializing Ollama: {str(e)}")
24-
raise OllamaError(f"Failed to initialize Ollama: {str(e)}")
24+
raise OllamaError(f"Failed to initialize Ollama: {str(e)}")

freebies/local-rag/setup.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
"""
22
Setup script for the RAG Chatbot application.
3-
This script downloads necessary files from the NovaSystem repository and sets up the application.
3+
This script downloads necessary files from the specified repository and sets up the application.
44
"""
55

66
import os
77
import subprocess
88
import requests
99

10-
REPO_OWNER = 'ctavolazzi'
11-
REPO_NAME = 'NovaSystem'
10+
REPO_OWNER = 'yourusername'
11+
REPO_NAME = 'rag-chatbot'
1212
BRANCH = 'main'
1313
FOLDER_PATH = 'freebies/local-rag'
1414
FILES_TO_DOWNLOAD = [
@@ -18,7 +18,8 @@
1818
'vector_store.py',
1919
'llm.py',
2020
'chatbot.py',
21-
'requirements.txt'
21+
'requirements.txt',
22+
'README.md'
2223
]
2324

2425
def download_file(file_name):
@@ -83,4 +84,4 @@ def main():
8384
print("2. Run the application: python app.py")
8485

8586
if __name__ == "__main__":
86-
main()
87+
main()

freebies/local-rag/vector_store.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,4 @@ def add_documents_to_store(vector_store, new_chunks):
5757
logger.info(f"Added {len(new_chunks)} new chunks to the vector store")
5858
except Exception as e:
5959
logger.error(f"Error adding new documents to vector store: {str(e)}")
60-
raise VectorStoreError(f"Failed to add new documents to vector store: {str(e)}")
60+
raise VectorStoreError(f"Failed to add new documents to vector store: {str(e)}")

0 commit comments

Comments
 (0)