diff --git a/.bin/files.json b/.bin/files.json new file mode 100644 index 0000000..222a766 --- /dev/null +++ b/.bin/files.json @@ -0,0 +1 @@ +[{"id": "file-gwWAHylwSsbgiGtgueJXDQy7", "bytes": 22887384, "created_at": 1701634508, "filename": "10dayMBA - 1.pdf", "object": "file", "purpose": "assistants", "status": "processed", "status_details": null}, {"id": "file-F1Up2lgzKDRblggN0NBpBxjE", "bytes": 13464, "created_at": 1701634510, "filename": "s1w1 notes.docx", "object": "file", "purpose": "assistants", "status": "processed", "status_details": null}, {"id": "file-NUzY5dlGyYfneoeUhKgwPnRt", "bytes": 13875, "created_at": 1701634512, "filename": "s2w1 notes.docx", "object": "file", "purpose": "assistants", "status": "processed", "status_details": null}] \ No newline at end of file diff --git a/.github/workflows/development_timetostudybuddy.yml b/.github/workflows/development_timetostudybuddy.yml new file mode 100644 index 0000000..d4275e7 --- /dev/null +++ b/.github/workflows/development_timetostudybuddy.yml @@ -0,0 +1,67 @@ +# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# More GitHub Actions for Azure: https://github.com/Azure/actions +# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions + +name: Build and deploy Python app to Azure Web App - timetostudybuddy + +on: + push: + branches: + - development + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python version + uses: actions/setup-python@v1 + with: + python-version: '3.9' + + - name: Create and start virtual environment + run: | + python -m venv venv + source venv/bin/activate + + - name: Install dependencies + run: pip install -r requirements.txt + + # Optional: Add step to run tests here (PyTest, Django test suites, etc.) + - name: Zip artifact for deployment + run: zip release.zip ./* -r + + - name: Upload artifact for deployment jobs + uses: actions/upload-artifact@v3 + with: + name: python-app + path: | + release.zip + !venv/ + + deploy: + runs-on: ubuntu-latest + needs: build + environment: + name: 'Production' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v3 + with: + name: python-app + + - name: Unzip artifact for deployment + run: unzip release.zip + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v2 + id: deploy-to-webapp + with: + app-name: 'timetostudybuddy' + slot-name: 'Production' + publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_55AB58D01B2644DE951E25FDE92530B2 }} diff --git a/SQL Scripts/Objects.sql b/SQL Scripts/Objects.sql index b74622e..5afae8c 100644 --- a/SQL Scripts/Objects.sql +++ b/SQL Scripts/Objects.sql @@ -34,3 +34,11 @@ CREATE TABLE STUDYBUDDY.FAQs ( CONSTRAINT FK_FAQs_Class FOREIGN KEY (class_id) REFERENCES master.STUDYBUDDY.Classes(class_id), CONSTRAINT FK_FAQs_User FOREIGN KEY (user_id) REFERENCES STUDYBUDDY.Users(user_id) ); + +CREATE TABLE master.STUDYBUDDY.Modules ( + module_id int IDENTITY(1,1) NOT NULL, + module_name nvarchar(100) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL, + class_id int NOT NULL, + CONSTRAINT PK__Modules__ModuleID PRIMARY KEY (module_id), + CONSTRAINT FK_ClassModule FOREIGN KEY (class_id) REFERENCES master.STUDYBUDDY.Classes(class_id) +); \ No newline at end of file diff --git a/Scripts/__chatscreen.py b/Scripts/__chatscreen.py new file mode 100644 index 0000000..4efcb1c --- /dev/null +++ b/Scripts/__chatscreen.py @@ -0,0 +1,181 @@ +# **************************************************************************** # +# # +# ::: :::::::: # +# __chatscreen.py :+: :+: :+: # +# +:+ +:+ +:+ # +# By: ammar syed ali INITIAL PROMPT +You're chatting with {st.session_state.user_info['username']}\n +Their role is : {st.session_state.user_info['role']}\n +The class is : {st.session_state.class_info['class_name']}\n +The class learning outcomes are:\n {class_learning_outcomes}\n +You are going to discuss the following modules:\n +""" + + for module, outcome in module_learning_outcomes.items(): + initial_prompt += f" -Module: {module}\n\n" + initial_prompt += f" -Learning outcomes: {outcome}\n\n" + + initial_prompt += f"Here is info on the files you recieved:\n\n{st.session_state.blobs_to_retrieve} \n\n" + + initial_prompt += f"Here are the FAQs for this class:\n\n{faq_df}" + + return initial_prompt + + +def upload_files_ai(blob_paths): + blob_service_client = BlobServiceClient.from_connection_string(os.getenv("AZURE_STORAGE_CONNECTION_STRING")) + container_client = blob_service_client.get_container_client(os.getenv("AZURE_CONTAINER")) + client = st.session_state.ai_client + + # Base directory (assuming this script is in the /scripts subdirectory) + base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # Paths + staging_dir = os.path.join(base_dir, 'staging') + json_path = os.path.join(base_dir, '.bin', 'files.json') + + # Ensure directories exist + os.makedirs(staging_dir, exist_ok=True) + os.makedirs(os.path.dirname(json_path), exist_ok=True) + + # List to store file objects + uploaded_files = [] + + for blob_path in blob_paths: + # Adjust the path to save in the staging directory + staging_path = os.path.join(staging_dir, os.path.basename(blob_path)) + + # Download the file from Azure Blob + blob_client = container_client.get_blob_client(blob_path) + with open(staging_path, "wb") as download_file: + download_file.write(blob_client.download_blob().readall()) + + # Upload the file to OpenAI + with open(staging_path, "rb") as file: + response = client.files.create(file=file, purpose="assistants") + uploaded_files.append(response) + + # Delete the file from the staging directory + os.remove(staging_path) + + # Return this list of file ids + return [file_obj.id for file_obj in uploaded_files] \ No newline at end of file diff --git a/Scripts/__classmanager.py b/Scripts/__classmanager.py new file mode 100644 index 0000000..8beb35d --- /dev/null +++ b/Scripts/__classmanager.py @@ -0,0 +1,74 @@ +# **************************************************************************** # +# # +# ::: :::::::: # +# __classmanager.py :+: :+: :+: # +# +:+ +:+ +:+ # +# By: ammar syed ali Tuple[List[str], str]: - pdf = PdfReader(file) - output = [] - for page in pdf.pages: - text = page.extract_text() - text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text) - text = re.sub(r"(? List[Document]: - if isinstance(text, str): - text = [text] - page_docs = [Document(page_content=page) for page in text] - for i, doc in enumerate(page_docs): - doc.metadata["page"] = i + 1 - - doc_chunks = [] - for doc in page_docs: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=4000, - separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""], - chunk_overlap=0, - ) - chunks = text_splitter.split_text(doc.page_content) - for i, chunk in enumerate(chunks): - doc = Document( - page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i} - ) - doc.metadata["source"] = f"{doc.metadata['page']}-{doc.metadata['chunk']}" - doc.metadata["filename"] = filename # Add filename to metadata - doc_chunks.append(doc) - return doc_chunks - -def upload_file(): - if st.button("Upload File"): - st.session_state.show_upload_file = not st.session_state.show_upload_file - - if st.session_state.show_upload_file: - pdf_files = st.file_uploader("Upload your files here", - accept_multiple_files=True, - help="Only .pdf files only please", - type='pdf') - if st.button("Submit"): - # Display a warning if the user hasn't uploaded a file - if not pdf_files: - st.warning("Please upload a file first!") - +if st.session_state.user_info['role'] == 'teacher': + help_text = 'Upload your class materials here!' +else: + help_text = 'Any files, like notes or outlines' + +def upload_class_file(): + files = st.file_uploader("Upload files relevant to the class as a whole ie syllabus, schedule, etc", + accept_multiple_files=True, + help=help_text, + key = st.session_state.upload_key) + if st.button("Submit", key='class_upload_submit', use_container_width=True): + # Display a warning if the user hasn't uploaded a file + if not files: + st.warning("Please upload a file first!") + else: with st.spinner("Uploading your files..."): - pdf_names = [file.name for file in pdf_files] # get the names for each file + for file in files: + file_stream = BytesIO(file.getvalue()) + blob_name = st.session_state.class_info['class_name'] + '/' + file.name + azb.upload_file_to_blob(file_stream, blob_name) - # Create the index if it doesn't exist - if st.session_state.class_info['index_name'] is None: - index_name = create_class_index() - - # Upload the document to Azure Cognitive Search - #1 Parse the PDF - documents = [] - for pdf_file, pdf_name in zip(pdf_files, pdf_names): - text, filename = parse_pdf(pdf_file, pdf_name) - documents = documents + text_to_docs(text, filename) - st.write(f"Here is the documents: {documents}") - #2 Upload the documents to Azure Cognitive Search - embeddings=OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'), - model="text-embedding-ada-002", - chunk_size=1000) - - acs = AzureSearch(azure_search_endpoint=os.getenv('AZURE_AI_SEARCH_ENDPOINT'), - azure_search_key=os.getenv('AZURE_AI_SEARCH_API_KEY'), - index_name=st.session_state.class_info['index_name'], - embedding_function=embeddings.embed_query) + # Reset the file uploader widget + st.session_state.upload_key = str(randint(0, 1000000)) + st.rerun() + +def upload_module_file(): + files = st.file_uploader("Upload files your files for this module", + accept_multiple_files=True, + help=help_text, + key = "fufuf" + st.session_state.upload_key_2) + if st.button("Submit", use_container_width=True): + # Display a warning if the user hasn't uploaded a file + if not files: + st.warning("Please upload a file first!") + else: + with st.spinner("Uploading your files..."): + for file in files: + file_stream = BytesIO(file.getvalue()) + if st.session_state.user_info['role'] == 'teacher': + blob_name = st.session_state.class_info['class_name'] + '/' + st.session_state.selected_module_name + '/' + file.name + else: + blob_name = st.session_state.class_info['class_name'] + '/' + st.session_state.selected_module_name + '/STUDENT_NOTES/' + st.session_state.user_info['username'] + '/' + file.name + azb.upload_file_to_blob(file_stream, blob_name) - acs.add_documents(documents=documents) - - \ No newline at end of file + # Reset the file uploader widget + st.session_state.upload_key_2 = str(randint(1000001, 10000000)) + st.rerun() diff --git a/Scripts/__login.py b/Scripts/__login.py index 7d6a861..bf9c4f6 100644 --- a/Scripts/__login.py +++ b/Scripts/__login.py @@ -16,13 +16,14 @@ def signup(): st.subheader("Sign Up") - + st.info("Please fill in the details below to create a new account.") # User details input new_username = st.text_input("Create a new username") new_password = st.text_input("Create a password", type="password") email = st.text_input("Enter your email") school = st.text_input("Enter your school") + st.info("Students must have a class code to join classes. Only teachers can create classes") # Role selection using radio buttons role = st.radio("Select your role", ["student", "teacher"]) @@ -52,7 +53,7 @@ def login(): def LoginContainer(): # Main application # Create a dropdown to select action (Sign Up or Log In) - + st.info("Trying to demo the app?\n\n Use pre-made accounts found here: https://github.com/ammarali0416/StudyBuddy") selected_action = st.selectbox("Select an action:", ["Log In", "Sign Up"], key="login_selectbox") if selected_action == "Sign Up": diff --git a/Scripts/__modules.py b/Scripts/__modules.py new file mode 100644 index 0000000..de4431e --- /dev/null +++ b/Scripts/__modules.py @@ -0,0 +1,78 @@ +# **************************************************************************** # +# # +# ::: :::::::: # +# __modules.py :+: :+: :+: # +# +:+ +:+ +:+ # +# By: ammar syed ali 1 and "." not in parts[1] else None + student_name = parts[3] if len(parts) > 3 and "STUDENT_NOTES" in parts else None + if len(parts) > 1 and "." in parts[1]: + module_name = "CLASS_LEVEL" + df = pd.concat([df, pd.DataFrame({'full_path': path, 'module_name': module_name, 'student_name': student_name}, index=[0])], ignore_index=True) + ## when a fileis a the class level make the module name CLASS_LEVEL + ## filter the data fram checking the blob path for two + st.session_state.blobs_df = df \ No newline at end of file diff --git a/Scripts/azsqldb.py b/Scripts/azsqldb.py index bb8f397..482de2d 100644 --- a/Scripts/azsqldb.py +++ b/Scripts/azsqldb.py @@ -138,7 +138,7 @@ def get_classes(user_id, role, sqlcursor): } # classes["Anam's Class"]['class_id'] would return 1]""" -def new_class(user_id, sqlcursor, class_name): +def new_class(user_id, sqlcursor, class_name, learnig_outcomes): """ Create a new class """ @@ -146,7 +146,7 @@ def new_class(user_id, sqlcursor, class_name): class_code = ''.join(random.choices('0123456789ABCDEF', k=6)) # Execute a SQL query to insert the new class - sqlcursor.execute("INSERT INTO master.STUDYBUDDY.classes (class_name, class_code, teacher_id) VALUES (?, ?, ?)", (class_name, class_code, user_id)) + sqlcursor.execute("INSERT INTO master.STUDYBUDDY.classes (class_name, class_code, teacher_id, LearningOutcomes) VALUES (?, ?, ?, ?)", (class_name, class_code, user_id, learnig_outcomes)) # Commit the transaction sqlcursor.connection.commit() @@ -206,6 +206,36 @@ def get_questions(class_id, sqlcursor): df = pd.DataFrame(data) return df +def get_questions_usernames(class_id, sqlcursor): + """ + Get all the questions and answers for a particular class + and return them as a Pandas DataFrame. + Instead of the user_id, return the username. + """ + # Execute a SQL query to get all the questions for the provided class_id + sqlcursor.execute("""SELECT b.username, a.question, a.answer + FROM master.STUDYBUDDY.FAQs a + LEFT JOIN master.STUDYBUDDY.Users b ON + a.user_id = b.user_id + WHERE class_id = ?""", (class_id,)) + + # Fetch all the records returned by the query + question_records = sqlcursor.fetchall() + + # Create a list of dictionaries for each record + data = [] + for record in question_records: + data.append({ + 'username': record[0], + 'question': record[1], + 'answer': record[2] + }) + + # Create and return a DataFrame from the list of dictionaries + df = pd.DataFrame(data) + return df + + def update_faqs(original_df, edited_df, sqlcursor): # Separate new questions (with None in faq_id) new_questions = edited_df[edited_df['faq_id'].isnull()] @@ -279,6 +309,17 @@ def update_faqs(original_df, edited_df, sqlcursor): # Commit the changes after deletion sqlcursor.connection.commit() +def ask_question(user_id, class_id, question, sqlcursor): + """ + Ask a question in a particular class. + """ + # Execute a SQL query to insert the new question + sqlcursor.execute("INSERT INTO master.STUDYBUDDY.FAQs (class_id, user_id, question) VALUES (?, ?, ?)", (class_id, user_id, question)) + # Commit the transaction + + sqlcursor.connection.commit() + + def update_class(sqlcursor, class_id, field, new_value): """ Update the class table with the new value for the provided field @@ -295,6 +336,83 @@ def update_class(sqlcursor, class_id, field, new_value): # Commit the changes sqlcursor.connection.commit() +def get_modules(class_id, sqlcursor): + """ + Get all the modules associated with a particular class + Returns a dictionary mapping module names to their module IDs + """ + # Execute SQL query to get all modules for the provided class_id + sqlcursor.execute(""" + SELECT module_id, module_name + FROM master.STUDYBUDDY.Modules + WHERE class_id = ? + """, (class_id,)) + + # Fetch all records from the query + module_records = sqlcursor.fetchall() + + # Create a dictionary mapping module names to their IDs + module_info_mapping = {record[1]: record[0] for record in module_records} + + return module_info_mapping + +def new_module(class_id, module_name, learning_outcome, sqlcursor): + """ + Create a new module for a specific class. + """ + # Execute a SQL query to insert the new module + sqlcursor.execute(""" + INSERT INTO master.STUDYBUDDY.Modules (class_id, module_name, LearningOutcomes) + VALUES (?, ?, ?) + """, (class_id, module_name, learning_outcome)) + # Commit the transaction + sqlcursor.connection.commit() + +def delete_module(module_id, sqlcursor): + """ + Delete a module from the database. + """ + # Execute a SQL query to delete the module + sqlcursor.execute(""" + DELETE FROM master.STUDYBUDDY.Modules + WHERE module_id = ? + """, (module_id,)) + # Commit the transaction + sqlcursor.connection.commit() + +def get_learning_outcomes(class_id, selected_modules, sqlcursor): + """ + Get the learning outcomes for the selected modules. + Returns a dictionary mapping module names to their learning outcomes and class information. + """ + # Execute a SQL query to get the learning outcomes for the selected modules + sqlcursor.execute(""" + SELECT module_name, LearningOutcomes + FROM master.STUDYBUDDY.Modules + WHERE class_id = ? + AND module_name IN ({}) + """.format(','.join('?' * len(selected_modules))), (class_id, *selected_modules)) + # Fetch all records from the query + learning_outcome_records = sqlcursor.fetchall() + # Create a dictionary mapping module names to their learning outcomes + learning_outcomes = {} + for record in learning_outcome_records: + module_name = record[0] + learning_outcome = record[1] + learning_outcomes[module_name] = learning_outcome + + # Execute a SQL query to get the class information + sqlcursor.execute(""" + SELECT class_name, LearningOutcomes + FROM master.STUDYBUDDY.Classes + WHERE class_id = ? + """, (class_id,)) + # Fetch the record from the query + class_record = sqlcursor.fetchone() + class_learning_outcomes = class_record[1] + + return learning_outcomes, class_learning_outcomes + def get_assignments(class_id, sqlcursor): """ diff --git a/Scripts/sessionvars.py b/Scripts/sessionvars.py index a0fdaed..e622005 100644 --- a/Scripts/sessionvars.py +++ b/Scripts/sessionvars.py @@ -12,9 +12,16 @@ """ Initialize session variables for the app """ - +from openai import OpenAI import streamlit as st +import os +from dotenv import load_dotenv, find_dotenv from Scripts import azsqldb +from random import randint +import uuid + + +load_dotenv(find_dotenv()) def initialize_session_vars(): ''' @@ -64,4 +71,74 @@ def initialize_session_vars(): #### # upload file vars if 'show_upload_file' not in st.session_state: - st.session_state.show_upload_file = False \ No newline at end of file + st.session_state.show_upload_file = False + + if 'show_upload_file2' not in st.session_state: + st.session_state.show_upload_file2 = False + + # Initialize the upload counter in session state + if 'upload_key' not in st.session_state: + st.session_state.upload_key = str(randint(0, 1000000)) + + # Initialize the upload counter in session state + if 'upload_key_2' not in st.session_state: + st.session_state.upload_key_2 = str(randint(1000001, 10000000)) + + #### + # Module vars + # Store the selected class so the dashboard remains the same after navigating to other pages + if 'selected_module_name' not in st.session_state: + st.session_state.selected_module_name = None + # New module toggle (teacher) + if 'new_module_toggle' not in st.session_state: + st.session_state.new_module_toggle = False + # Delete module toggle (teacher) + if 'delete_module_toggle' not in st.session_state: + st.session_state.delete_module_toggle = False + # Store module information + if "module_info" not in st.session_state: + st.session_state.module_info = { + 'module_id': None, + 'module_name': None + } + # Store the selected modules for chatting + if 'selected_modules' not in st.session_state: + st.session_state.selected_modules = [] + + #### chat screen vars + if 'context_selection_toggle' not in st.session_state: + st.session_state.context_selection_toggle = True + + if 'blobs_df' not in st.session_state: + st.session_state.blobs_df = None + + if 'blobs_to_retrieve' not in st.session_state: + st.session_state.blobs_to_retrieve = None + + if 'ai_client' not in st.session_state: + st.session_state.ai_client = OpenAI( + api_key = os.getenv("OPENAI_API_KEY") + ) + if "session_id" not in st.session_state: # Used to identify each session + st.session_state.session_id = str(uuid.uuid4()) + + if "run" not in st.session_state: # Stores the run state of the assistant + st.session_state.run = {"status": None} + + if "messages" not in st.session_state: # Stores the messages of the assistant + st.session_state.messages = [] + + if "retry_error" not in st.session_state: # Used for error handling + st.session_state.retry_error = 0 + + if 'openai_fileids' not in st.session_state: + st.session_state.openai_fileids = [] + + if 'initialized' not in st.session_state: + st.session_state.initialized = False + + if 'cleanup' not in st.session_state: + st.session_state.cleanup = False + + if 'uploaded_to_openai' not in st.session_state: + st.session_state.uploaded_to_openai = False \ No newline at end of file diff --git a/app.py b/app.py index 1dc1aa3..a0ab5f6 100644 --- a/app.py +++ b/app.py @@ -1,9 +1,27 @@ -from Scripts import azsqldb, sessionvars, __login as lg, __sidebar as sb +from Scripts import azsqldb +from Scripts import sessionvars +from Scripts import __login as lg +from Scripts import __sidebar as sb +from Scripts import __chatscreen as cs +from Scripts import azblob as azb import streamlit as st from markdownlit import mdlit +import pandas as pd +import os +import time +from dotenv import load_dotenv, find_dotenv + +load_dotenv(find_dotenv()) sessionvars.initialize_session_vars() + +if st.session_state.cleanup == False: + print("Cleaning up files from OpenAI") + cs.delete_files_from_openai() + print("DONE") + st.session_state.cleanup = True + custom_width = 250 # Assuming the image is in the same directory as your script @@ -29,11 +47,147 @@ # If the user is logged in, display the chat screen -if st.session_state.user_info['user_id']: - st.markdown(mdlit("""This is where the chat screen will be displayed.""")) - +if st.session_state.user_info['user_id']: # Display the teacher sidebar if st.session_state.user_info['role'] == 'teacher': sb.teacher_sidebar() else: sb.student_sidebar() + + # Display the chat screen + if st.session_state.context_selection_toggle: + cs.context_selection() + + # block runs only after context has been selected + if st.session_state.selected_modules not in [None, []]: + col4, col5 = st.columns([1,1]) + + col4.write(f"Chatting about: {st.session_state.selected_modules}") + col5.write(f"Current session: {st.session_state.session_id}") + + # Get all the class and module files + azb.get_class_and_module_files(st.session_state.class_info['class_name']) + # Retrieve only the selected modules' files + st.session_state.blobs_to_retrieve = st.session_state.blobs_df[st.session_state.blobs_df['module_name'].isin(st.session_state.selected_modules + ['CLASS_LEVEL'])] + ######################### + #st.dataframe(st.session_state.blobs_to_retrieve) + # Store the openai file ids of all the files uploaded to the assistant + if st.session_state.uploaded_to_openai == False: # To ensure this only happens once + st.session_state.openai_fileids = cs.upload_files_ai(st.session_state.blobs_to_retrieve['full_path']) + st.session_state.uploaded_to_openai = True + # Initialize the assistant + if "studybuddy" not in st.session_state: + st.session_state.studybuddy = st.session_state.ai_client.beta.assistants.retrieve(os.getenv('OPENAI_ASSISTANT')) + st.session_state.studybuddy = st.session_state.ai_client.beta.assistants.update( + assistant_id=st.session_state.studybuddy.id, + file_ids=st.session_state.openai_fileids + ) + # Create a new thread for this session + st.session_state.thread = st.session_state.ai_client.beta.threads.create( + metadata={ + 'session_id': st.session_state.session_id, + } + ) + # If the run is completed, display the messages + elif hasattr(st.session_state.run, 'status') and st.session_state.run.status == "completed": + # Retrieve the list of messages + st.session_state.messages = st.session_state.ai_client.beta.threads.messages.list( + thread_id=st.session_state.thread.id + ) + # Display sources + for thread_message in st.session_state.messages.data: + for message_content in thread_message.content: + # Access the actual text content + message_content = message_content.text + annotations = message_content.annotations + citations = [] + + # Iterate over the annotations and add footnotes + for index, annotation in enumerate(annotations): + # Replace the text with a footnote + message_content.value = message_content.value.replace(annotation.text, f' [{index}]') + + # Gather citations based on annotation attributes + if (file_citation := getattr(annotation, 'file_citation', None)): + cited_file = st.session_state.ai_client.files.retrieve(file_citation.file_id) + citations.append(f'[{index}] {file_citation.quote} from {cited_file.filename}') + elif (file_path := getattr(annotation, 'file_path', None)): + cited_file = st.session_state.ai_client.files.retrieve(file_path.file_id) + citations.append(f'[{index}] Click to download {cited_file.filename}') + # Note: File download functionality not implemented above for brevity + + # Add footnotes to the end of the message before displaying to user + message_content.value += '\n' + '\n'.join(citations) + # Display messages + for message in reversed(st.session_state.messages.data): + if message.role in ["user", "assistant"]: + for content_part in message.content: + message_text = content_part.text.value + # Check if the message contains the specified phrase + if " INITIAL PROMPT " not in message_text: + with st.chat_message(message.role): + st.markdown(message_text) + else: + # Optionally, you can print a message to the console for debugging + print("Skipped a message containing the initial prompt info.") + + if st.session_state.initialized == False: + prompt = cs.initialize_chat() + st.session_state.initialized = True + else: + prompt = st.chat_input("How can I help you?") + + if prompt: + with st.chat_message('user'): + st.write(prompt) + + # Add message to the thread + st.session_state.messages = st.session_state.ai_client.beta.threads.messages.create( + thread_id=st.session_state.thread.id, + role="user", + content=prompt + ) + # Do a run to process the messages in the thread + st.session_state.run = st.session_state.ai_client.beta.threads.runs.create( + thread_id=st.session_state.thread.id, + assistant_id=st.session_state.studybuddy.id, + ) + print(f"Current run id: {st.session_state.run.id}") + if st.session_state.retry_error < 3: + time.sleep(1) # Wait 1 second before checking run status + st.rerun() + # Check if 'run' object has 'status' attribute + if hasattr(st.session_state.run, 'status'): + # Handle the 'running' status + if st.session_state.run.status == "running": + with st.chat_message('assistant'): + st.write("Thinking ......") + if st.session_state.retry_error < 3: + time.sleep(2) # Short delay to prevent immediate rerun, adjust as needed + st.rerun() + + # Handle the 'failed' status + elif st.session_state.run.status == "failed": + st.session_state.retry_error += 1 + with st.chat_message('assistant'): + if st.session_state.retry_error < 3: + st.write("Run failed, retrying ......") + time.sleep(5) # Longer delay before retrying + st.rerun() + else: + st.error("FAILED: The OpenAI API is currently processing too many requests. Please try again later ......") + + # Handle any status that is not 'completed' + elif st.session_state.run.status != "completed": + print("""# Handle any status that is not 'completed' + elif st.session_state.run.status != "completed":""") + print(f"Current run status: {st.session_state.run.status}") + print(f"Current run id: {st.session_state.run.id}") + # Attempt to retrieve the run again, possibly redundant if there's no other status but 'running' or 'failed' + st.session_state.run = st.session_state.ai_client.beta.threads.runs.retrieve( + thread_id=st.session_state.thread.id, + run_id=st.session_state.run.id, + ) + if st.session_state.retry_error < 3: + time.sleep(2) + st.rerun() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5b88804..65cdebb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,10 @@ -openai -langchain -faiss-cpu -pypdf -tiktoken -python-dotenv -pyodbc -streamlit -wget +openai==1.3.7 +python-dotenv==1.0.0 +pyodbc==5.0.1 +streamlit==1.28.1 +wget==3.2 azure-search-documents==11.4.0b8 -azure-identity -PyPDF2 -re -io -markdownlit \ No newline at end of file +azure-storage-blob==12.19.0 +azure-identity==1.15.0 +tiktoken==0.5.1 +markdownlit==0.0.7 \ No newline at end of file diff --git a/staging/full_path b/staging/full_path new file mode 100644 index 0000000..e69de29 diff --git a/temp.ipynb b/temp.ipynb new file mode 100644 index 0000000..035d76d --- /dev/null +++ b/temp.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient\n", + "import openai\n", + "from openai import OpenAI\n", + "from dotenv import load_dotenv, find_dotenv\n", + "import os\n", + "import json\n", + "\n", + "load_dotenv(find_dotenv())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "blob_service_client = BlobServiceClient.from_connection_string(os.getenv(\"AZURE_STORAGE_CONNECTION_STRING\"))\n", + "container_client = blob_service_client.get_container_client(os.getenv(\"AZURE_CONTAINER\"))\n", + "\n", + "client = OpenAI(\n", + " api_key=os.getenv(\"OPENAI_API_KEY\"),\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "staging_dir = '/staging'\n", + "os.makedirs(staging_dir, exist_ok=True)\n", + "\n", + "\n", + "blob_paths = [\n", + "'BUS5000/Introduction/10dayMBA - Intro.pdf',\n", + "'BUS5000/Introduction/STUDENT_NOTES/s1/s1w0.docx',\n", + "'BUS5000/Introduction/STUDENT_NOTES/s2/s2w0.docx'\n", + "]\n", + "# List to store file objects\n", + "uploaded_files = []\n", + "\n", + "for blob_path in blob_paths:\n", + " # Adjust the path to save in the staging directory\n", + " staging_path = os.path.join(staging_dir, os.path.basename(blob_path))\n", + "\n", + " # Download the file from Azure Blob\n", + " blob_client = container_client.get_blob_client(blob_path)\n", + " with open(staging_path, \"wb\") as download_file:\n", + " download_file.write(blob_client.download_blob().readall())\n", + "\n", + " # Upload the file to OpenAI\n", + " with open(staging_path, \"rb\") as file:\n", + " response = client.files.create(file=file, purpose=\"assistants\")\n", + " uploaded_files.append(response)\n", + "\n", + " # Delete the file from the staging directory\n", + " os.remove(staging_path)\n", + "\n", + "# Function to convert FileObject to a serializable dictionary\n", + "def file_object_to_dict(file_obj):\n", + " return {\n", + " 'id': file_obj.id,\n", + " 'bytes': file_obj.bytes,\n", + " 'created_at': file_obj.created_at,\n", + " 'filename': file_obj.filename,\n", + " 'object': file_obj.object,\n", + " 'purpose': file_obj.purpose,\n", + " 'status': file_obj.status,\n", + " 'status_details': file_obj.status_details\n", + " }\n", + "\n", + "# Convert each FileObject in the list to a dictionary\n", + "file_dicts = [file_object_to_dict(file_obj) for file_obj in uploaded_files]\n", + "\n", + "# Save the list of dictionaries as a JSON list\n", + "json_path = './.bin/files.json'\n", + "with open(json_path, 'w') as json_file:\n", + " json.dump(file_dicts, json_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Deleted file file-183J5qW4YR5swyMd67rIPLFA\n", + "Deleted file file-2iMor5j5BBN8jYLou1rvNgl2\n", + "Deleted file file-Rz4NLaIi5bdhsDUeDwy674r4\n", + "Deleted file file-hOIlgRLDBy7aEH3Jgu3mjb9B\n", + "Deleted file file-lGUQHlgWAKeozDd08feZcRtM\n", + "Deleted file file-9IJfZpAfX39gZw2iT3CpHYtk\n", + "Deleted file file-y5wq3UbFHy1YZdaLjY0W9AIH\n", + "Deleted file file-l4cV4p8PMxxdPb5utucARJ0r\n", + "Deleted file file-RBs8iX0qsVwxEKFIivKTmPBe\n", + "Deleted file file-nmf9Fi5S15uvasWHijN5vvXK\n", + "Deleted file file-y5aek5I2heiKhBRQH0YmCIAS\n", + "Deleted file file-qCpLcxbvBROxCG1ehFSZY5Oh\n", + "Deleted file file-xezeLthSxhG1tlH6F9tZa6mf\n", + "Deleted file file-lwXRv8ScOAwOdvkwGxmuanaY\n", + "Deleted file file-exAi161HW9VCQoV8tqVigC1O\n", + "ending..\n" + ] + } + ], + "source": [ + "def delete_files_from_openai():\n", + " files = client.files.list()\n", + " # Check if there are no files\n", + " if not files.data:\n", + " print(\"ending..\")\n", + " return # End the function if there are no files\n", + "\n", + " # If there are files, proceed with deletion\n", + " for file in files.data:\n", + " file_id = file.id\n", + " client.files.delete(\n", + " file_id=file_id\n", + " ) \n", + " print(f\"Deleted file {file_id}\")\n", + " \n", + " files = client.beta.assistants.files.list(\n", + " assistant_id=os.getenv(\"OPENAI_ASSISTANT\")\n", + " )\n", + "\n", + " # Check if there are no files\n", + " if not files.data:\n", + " print(\"ending..\")\n", + " return # End the function if there are no files\n", + "\n", + " # If there are files, proceed with deletion\n", + " for file in files.data:\n", + " file_id = file.id\n", + " client.beta.assistants.files.delete(\n", + " assistant_id=os.getenv(\"OPENAI_ASSISTANT\"),\n", + " file_id=file_id\n", + " )\n", + " \n", + "# Call the function to execute\n", + "delete_files_from_openai()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "openai==1.3.7\n", + "python-dotenv==1.0.0\n", + "pyodbc==5.0.1\n", + "streamlit==1.28.1\n", + "wget==3.2\n", + "azure-search-documents==11.4.0b8\n", + "azure-storage-blob==12.19.0\n", + "azure-identity==1.15.0\n" + ] + } + ], + "source": [ + "import pkg_resources\n", + "\n", + "# List of your packages\n", + "packages = [\n", + " 'openai', \n", + " 'python-dotenv', \n", + " 'pyodbc', \n", + " 'streamlit', \n", + " 'wget', \n", + " 'azure-search-documents', \n", + " 'azure-storage-blob', \n", + " 'azure-identity',\n", + " 'markdownlit'\n", + "]\n", + "\n", + "for package in packages:\n", + " try:\n", + " version = pkg_resources.get_distribution(package).version\n", + " print(f\"{package}=={version}\")\n", + " except pkg_resources.DistributionNotFound:\n", + " print(f\"{package} not found\")\n", + "\n", + "# If you want to save this to a file, you can redirect the output to 'requirements.txt'\n", + "# Run this script with 'python script_name.py > requirements.txt' in your terminal\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/testing_chat.py b/testing_chat.py new file mode 100644 index 0000000..2f05306 --- /dev/null +++ b/testing_chat.py @@ -0,0 +1,107 @@ +import streamlit as st +import openai +import uuid +import time +from dotenv import load_dotenv, find_dotenv +import os + +load_dotenv(find_dotenv()) + +from openai import OpenAI +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +if "studybuddy" not in st.session_state: + st.session_state.assistant = openai.beta.assistants.retrieve(st.secrets["OPENAI_ASSISTANT"]) + + # Create a new thread for this session + st.session_state.thread = client.beta.threads.create( + metadata={ + 'session_id': st.session_state.session_id, + } + ) +# If the run is completed, display the messages +elif hasattr(st.session_state.run, 'status') and st.session_state.run.status == "completed": + # Retrieve the list of messages + st.session_state.messages = client.beta.threads.messages.list( + thread_id=st.session_state.thread.id + ) +# Display sources + for thread_message in st.session_state.messages.data: + for message_content in thread_message.content: + # Access the actual text content + message_content = message_content.text + annotations = message_content.annotations + citations = [] + + # Iterate over the annotations and add footnotes + for index, annotation in enumerate(annotations): + # Replace the text with a footnote + message_content.value = message_content.value.replace(annotation.text, f' [{index}]') + + # Gather citations based on annotation attributes + if (file_citation := getattr(annotation, 'file_citation', None)): + cited_file = client.files.retrieve(file_citation.file_id) + citations.append(f'[{index}] {file_citation.quote} from {cited_file.filename}') + elif (file_path := getattr(annotation, 'file_path', None)): + cited_file = client.files.retrieve(file_path.file_id) + citations.append(f'[{index}] Click to download {cited_file.filename}') + # Note: File download functionality not implemented above for brevity + + # Add footnotes to the end of the message before displaying to user + message_content.value += '\n' + '\n'.join(citations) +# Display messages + for message in reversed(st.session_state.messages.data): + if message.role in ["user", "assistant"]: + with st.chat_message(message.role): + for content_part in message.content: + message_text = content_part.text.value + st.markdown(message_text) +if prompt := st.chat_input("How can I help you?"): + with st.chat_message('user'): + st.write(prompt) + + # Add message to the thread + st.session_state.messages = client.beta.threads.messages.create( + thread_id=st.session_state.thread.id, + role="user", + content=prompt + ) +# Do a run to process the messages in the thread + st.session_state.run = client.beta.threads.runs.create( + thread_id=st.session_state.thread.id, + assistant_id=st.session_state.assistant.id, + ) + if st.session_state.retry_error < 3: + time.sleep(1) # Wait 1 second before checking run status + st.rerun() +# Check if 'run' object has 'status' attribute +if hasattr(st.session_state.run, 'status'): + # Handle the 'running' status + if st.session_state.run.status == "running": + with st.chat_message('assistant'): + st.write("Thinking ......") + if st.session_state.retry_error < 3: + time.sleep(1) # Short delay to prevent immediate rerun, adjust as needed + st.rerun() + + # Handle the 'failed' status + elif st.session_state.run.status == "failed": + st.session_state.retry_error += 1 + with st.chat_message('assistant'): + if st.session_state.retry_error < 3: + st.write("Run failed, retrying ......") + time.sleep(3) # Longer delay before retrying + st.rerun() + else: + st.error("FAILED: The OpenAI API is currently processing too many requests. Please try again later ......") + + # Handle any status that is not 'completed' + elif st.session_state.run.status != "completed": + # Attempt to retrieve the run again, possibly redundant if there's no other status but 'running' or 'failed' + st.session_state.run = client.beta.threads.runs.retrieve( + thread_id=st.session_state.thread.id, + run_id=st.session_state.run.id, + ) + if st.session_state.retry_error < 3: + time.sleep(3) + st.rerun() \ No newline at end of file