1
- """
2
- Main application script for the RAG Chatbot.
3
- """
4
-
5
1
import gradio as gr
6
2
import logging
7
- from chatbot import chat_ollama
8
- from data_loader import get_document_chunks
9
- from vector_store import get_vector_store , VectorStoreError
3
+ from scrapegraphai .graphs import SmartScraperGraph
4
+ from langchain .document_loaders import UnstructuredMarkdownLoader
5
+ from langchain .text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain .vectorstores import Chroma
7
+ from langchain .embeddings import OllamaEmbeddings
8
+ from langchain .schema import StrOutputParser
9
+ from langchain .schema .runnable import RunnablePassthrough
10
10
from config import CONFIG
11
+ from vector_store import VectorStoreError , get_vector_store
12
+ from chatbot import chat_ollama
11
13
12
14
# Set up logging
13
15
logging .basicConfig (
16
18
)
17
19
logger = logging .getLogger (__name__ )
18
20
21
+ def scrape_with_scrapegraphai (url ):
22
+ """Scrape content using ScrapeGraphAI library."""
23
+ graph_config = {
24
+ "llm" : {
25
+ "model" : CONFIG ['OLLAMA_MODEL' ],
26
+ "temperature" : 0 ,
27
+ "format" : "json" ,
28
+ "base_url" : CONFIG ['OLLAMA_URL' ],
29
+ },
30
+ "embeddings" : {
31
+ "model" : "ollama/nomic-embed-text" ,
32
+ "base_url" : CONFIG ['OLLAMA_URL' ],
33
+ },
34
+ "verbose" : True ,
35
+ }
36
+ smart_scraper_graph = SmartScraperGraph (
37
+ prompt = "Extract all the text content" ,
38
+ source = url ,
39
+ config = graph_config
40
+ )
41
+ result = smart_scraper_graph .run ()
42
+ markdown_path = "scraped_content.md"
43
+ with open (markdown_path , "w" , encoding = "utf-8" ) as file :
44
+ for item in result ['content' ]:
45
+ file .write (item + "\n " )
46
+
47
+ return markdown_path
48
+
49
+ def ingest_markdown (markdown_path ):
50
+ """Ingest the markdown content into the vector store."""
51
+ loader = UnstructuredMarkdownLoader (markdown_path )
52
+ docs = loader .load ()
53
+
54
+ chunk_size = CONFIG ['CHUNK_SIZE' ]
55
+ chunk_overlap = CONFIG ['CHUNK_OVERLAP' ]
56
+ text_splitter = RecursiveCharacterTextSplitter (chunk_size = chunk_size , chunk_overlap = chunk_overlap )
57
+ splits = text_splitter .split_documents (docs )
58
+
59
+ embeddings = OllamaEmbeddings (base_url = CONFIG ['OLLAMA_URL' ], model = CONFIG ['OLLAMA_MODEL' ])
60
+ vectorstore = Chroma .from_documents (documents = splits , embedding = embeddings , persist_directory = CONFIG ['CHROMA_PATH' ])
61
+ retriever = vectorstore .as_retriever ()
62
+
63
+ prompt_template = "Your local prompt template here"
64
+
65
+ def format_docs (docs ):
66
+ return "\n \n " .join (doc .page_content for doc in docs )
67
+
68
+ rag_chain = (
69
+ {"context" : retriever | format_docs , "question" : RunnablePassthrough ()}
70
+ | prompt_template
71
+ | chat_ollama # Using local Ollama model for processing
72
+ | StrOutputParser ()
73
+ )
74
+
75
+ return vectorstore , rag_chain
76
+
77
+ def ingest_url (url ):
78
+ """Ingest content from the URL into the vector store."""
79
+ try :
80
+ markdown_path = scrape_with_scrapegraphai (url )
81
+ vectorstore , rag_chain = ingest_markdown (markdown_path )
82
+ return "Content ingested successfully!" , vectorstore , rag_chain
83
+ except Exception as e :
84
+ logger .error (f"Error ingesting content from { url } : { str (e )} " )
85
+ return f"Failed to ingest content from { url } : { str (e )} " , None , None
86
+
87
+ def query_vectorstore (query , rag_chain ):
88
+ """Query the vector store with a given query."""
89
+ if rag_chain :
90
+ response = rag_chain .invoke (query )
91
+ return response
92
+ else :
93
+ return "Vector store not initialized."
94
+
19
95
def main ():
20
96
"""Initialize and run the RAG Chatbot application."""
21
97
try :
@@ -27,20 +103,30 @@ def main():
27
103
logger .info ("Loading existing vector database..." )
28
104
get_vector_store ()
29
105
30
- gradio_interface = gr .ChatInterface (
31
- chat_ollama ,
32
- chatbot = gr .Chatbot (),
33
- textbox = gr .Textbox (placeholder = "Example: Who is Alice?" , container = False , scale = 7 ),
34
- title = "The Ollama RAG Chatbot" ,
35
- description = f"Ask the { CONFIG ['OLLAMA_MODEL' ]} chatbot a question!" ,
36
- theme = 'gradio/base' ,
37
- retry_btn = None ,
38
- undo_btn = "Delete Previous" ,
39
- clear_btn = "Clear" ,
40
- )
106
+ with gr .Blocks () as gradio_interface :
107
+ gr .Markdown ("## The Ollama RAG Chatbot" )
108
+ with gr .Row ():
109
+ with gr .Column ():
110
+ url_input = gr .Textbox (label = "Enter URL to Ingest" , placeholder = "Example: https://example.com/article" )
111
+ ingest_button = gr .Button ("Ingest URL" )
112
+ ingestion_status = gr .Textbox (label = "Ingestion Status" , interactive = False )
113
+ chatbot = gr .Chatbot ()
114
+ user_input = gr .Textbox (placeholder = "Example: Who is Alice?" , container = False , scale = 7 )
115
+ send_button = gr .Button ("Send" )
116
+
117
+ def ingest_callback (url ):
118
+ status , vectorstore , rag_chain = ingest_url (url )
119
+ return status , vectorstore , rag_chain
120
+
121
+ def query_callback (user_input , rag_chain ):
122
+ response = query_vectorstore (user_input , rag_chain )
123
+ return response
124
+
125
+ ingest_button .click (ingest_callback , inputs = url_input , outputs = [ingestion_status , chatbot , None ])
126
+ send_button .click (query_callback , inputs = user_input , outputs = chatbot )
41
127
42
128
logger .info ("Starting Gradio interface..." )
43
- gradio_interface .launch ()
129
+ gradio_interface .launch (server_name = "0.0.0.0" , server_port = 7860 , inbrowser = True )
44
130
except VectorStoreError as e :
45
131
logger .error (f"Vector store error: { str (e )} " )
46
132
print (f"An error occurred with the vector store: { str (e )} " )
@@ -49,4 +135,4 @@ def main():
49
135
print (f"An unexpected error occurred: { str (e )} " )
50
136
51
137
if __name__ == "__main__" :
52
- main ()
138
+ main ()
0 commit comments