diff --git a/.github/workflows/dev-ci-cd.yaml b/.github/workflows/dev-ci-cd.yaml index e1407015..0bde2784 100644 --- a/.github/workflows/dev-ci-cd.yaml +++ b/.github/workflows/dev-ci-cd.yaml @@ -75,6 +75,9 @@ jobs: touch ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt chmod 400 ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt + touch ${{ github.workspace }}/deploy/dev/secrets/pg_password.txt + echo "${{ secrets.DEV_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/dev/secrets/pg_password.txt + chmod 400 ${{ github.workspace }}/deploy/dev/secrets/pg_password.txt # create env file to set tag(s) for docker-compose - name: Create Env File @@ -87,12 +90,12 @@ jobs: # stop any existing docker compose that's running - name: Stop Docker Compose run: | - ssh submit-t3desk 'bash -s' < ${{ github.workspace }}/deploy/dev/dev-stop.sh + ssh submit06 'bash -s' < ${{ github.workspace }}/deploy/dev/dev-stop.sh # copy repository to machine - name: Copy Repository run: | - rsync -e ssh -r ${{ github.workspace}}/* --exclude .git/ --delete submit-t3desk:~/A2rchi-dev/ + rsync -e ssh -r ${{ github.workspace}}/* --exclude .git/ --delete submit06:~/A2rchi-dev/ # run deploy script - name: Run Deploy Script @@ -100,18 +103,12 @@ jobs: export tag="${GITHUB_REF#refs/heads/}" export tag="${tag//\//-}.${GITHUB_SHA}" sed -i "s/BASE_TAG/${tag}/" ${{ github.workspace }}/deploy/dev/dev-install.sh - ssh submit-t3desk 'bash -s' < ${{ github.workspace }}/deploy/dev/dev-install.sh + ssh submit06 'bash -s' < ${{ github.workspace }}/deploy/dev/dev-install.sh # clean up secret files - name: Remove Secrets from Runner run: | - rm ${{ github.workspace }}/deploy/dev/secrets/cleo_*.txt - rm ${{ github.workspace }}/deploy/dev/secrets/imap_*.txt - rm ${{ github.workspace }}/deploy/dev/secrets/sender_*.txt - rm ${{ github.workspace }}/deploy/dev/secrets/flask_uploader_app_secret_key.txt - rm ${{ github.workspace }}/deploy/dev/secrets/uploader_salt.txt - rm ${{ github.workspace }}/deploy/dev/secrets/openai_api_key.txt - rm ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt + rm ${{ github.workspace }}/deploy/dev/secrets/*.txt # print job status - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/.github/workflows/prod-801-ci-cd.yaml b/.github/workflows/prod-801-ci-cd.yaml index 49b6affa..9d9429b6 100644 --- a/.github/workflows/prod-801-ci-cd.yaml +++ b/.github/workflows/prod-801-ci-cd.yaml @@ -42,6 +42,9 @@ jobs: touch ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt + touch ${{ github.workspace }}/deploy/prod-801/secrets/pg_password.txt + echo "${{ secrets.PROD_801_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/pg_password.txt + chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/pg_password.txt # create env file to set tag(s) for docker-compose - name: Create Env File @@ -72,10 +75,7 @@ jobs: # clean up secret files - name: Remove Secrets from Runner run: | - rm ${{ github.workspace }}/deploy/prod-801/secrets/flask_uploader_app_secret_key.txt - rm ${{ github.workspace }}/deploy/prod-801/secrets/uploader_salt.txt - rm ${{ github.workspace }}/deploy/prod-801/secrets/openai_api_key.txt - rm ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt + rm ${{ github.workspace }}/deploy/prod-801/secrets/*.txt # print job status - run: echo "🍏 This job's status is ${{ job.status }}." \ No newline at end of file diff --git a/.github/workflows/prod-ci-cd.yaml b/.github/workflows/prod-ci-cd.yaml index eafcadff..ad035343 100644 --- a/.github/workflows/prod-ci-cd.yaml +++ b/.github/workflows/prod-ci-cd.yaml @@ -75,6 +75,9 @@ jobs: touch ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt chmod 400 ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt + touch ${{ github.workspace }}/deploy/prod/secrets/pg_password.txt + echo "${{ secrets.PROD_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/prod/secrets/pg_password.txt + chmod 400 ${{ github.workspace }}/deploy/prod/secrets/pg_password.txt # create env file to set tag(s) for docker-compose - name: Create Env File @@ -105,13 +108,7 @@ jobs: # clean up secret files - name: Remove Secrets from Runner run: | - rm ${{ github.workspace }}/deploy/prod/secrets/cleo_*.txt - rm ${{ github.workspace }}/deploy/prod/secrets/imap_*.txt - rm ${{ github.workspace }}/deploy/prod/secrets/sender_*.txt - rm ${{ github.workspace }}/deploy/prod/secrets/flask_uploader_app_secret_key.txt - rm ${{ github.workspace }}/deploy/prod/secrets/uploader_salt.txt - rm ${{ github.workspace }}/deploy/prod/secrets/openai_api_key.txt - rm ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt + rm ${{ github.workspace }}/deploy/prod/secrets/*.txt # print job status - run: echo "🍏 This job's status is ${{ job.status }}." diff --git a/.github/workflows/prod-root-ci-cd.yaml b/.github/workflows/prod-root-ci-cd.yaml index 154ca05a..ba4d87e6 100644 --- a/.github/workflows/prod-root-ci-cd.yaml +++ b/.github/workflows/prod-root-ci-cd.yaml @@ -42,6 +42,9 @@ jobs: touch ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt + touch ${{ github.workspace }}/deploy/prod-root/secrets/pg_password.txt + echo "${{ secrets.PROD_ROOT_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/pg_password.txt + chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/pg_password.txt # create env file to set tag(s) for docker-compose - name: Create Env File @@ -72,10 +75,7 @@ jobs: # clean up secret files - name: Remove Secrets from Runner run: | - rm ${{ github.workspace }}/deploy/prod-root/secrets/flask_uploader_app_secret_key.txt - rm ${{ github.workspace }}/deploy/prod-root/secrets/uploader_salt.txt - rm ${{ github.workspace }}/deploy/prod-root/secrets/openai_api_key.txt - rm ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt + rm ${{ github.workspace }}/deploy/prod-root/secrets/*.txt # print job status - run: echo "🍏 This job's status is ${{ job.status }}." \ No newline at end of file diff --git a/.gitignore b/.gitignore index bdd9ff66..2eaa0d0e 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ venv *sqlite_db .vscode 801-content/ +logos/ diff --git a/A2rchi/interfaces/chat_app/app.py b/A2rchi/interfaces/chat_app/app.py index d70bfbd4..a24330c9 100644 --- a/A2rchi/interfaces/chat_app/app.py +++ b/A2rchi/interfaces/chat_app/app.py @@ -1,29 +1,87 @@ from A2rchi.chains.chain import Chain from A2rchi.utils.config_loader import Config_Loader from A2rchi.utils.data_manager import DataManager +from A2rchi.utils.env import read_secret +from A2rchi.utils.sql import SQL_INSERT_CONVO, SQL_INSERT_FEEDBACK, SQL_QUERY_CONVO +from datetime import datetime from pygments import highlight -from pygments.lexers import BashLexer,PythonLexer,JavaLexer,JavascriptLexer,BashLexer,CppLexer,CLexer,TypeScriptLexer,HtmlLexer,FortranLexer,JuliaLexer,MathematicaLexer,MatlabLexer +from pygments.lexers import ( + BashLexer, + PythonLexer, + JavaLexer, + JavascriptLexer, + CppLexer, + CLexer, + TypeScriptLexer, + HtmlLexer, + FortranLexer, + JuliaLexer, + MathematicaLexer, + MatlabLexer +) from pygments.formatters import HtmlFormatter from flask import request, jsonify, render_template from flask_cors import CORS from threading import Lock -from typing import Optional, List, Tuple +from typing import List -import numpy as np - -import json -import re import mistune as mt +import numpy as np import os +import psycopg2 +import psycopg2.extras import yaml -import time # DEFINITIONS -# TODO: remove this logic and eventually replace with per-user (or per-conversation) rate limits -QUERY_LIMIT = 10000 # max number of queries +QUERY_LIMIT = 10000 # max number of queries per conversation + + +class AnswerRenderer(mt.HTMLRenderer): + """ + Class for custom rendering of A2rchi output. Child of mistune's HTMLRenderer, with custom overrides. + Code blocks are structured and colored according to pygment lexers + """ + RENDERING_LEXER_MAPPING = { + "python": PythonLexer, + "java": JavaLexer, + "javascript": JavascriptLexer, + "bash": BashLexer, + "c++": CppLexer, + "cpp": CppLexer, + "c": CLexer, + "typescript": TypeScriptLexer, + "html": HtmlLexer, + "Fortran" : FortranLexer, + "Julia" : JuliaLexer, + "Mathematica" : MathematicaLexer, + "MATLAB": MatlabLexer + } + + def __init__(self): + super().__init__() + + def block_text(self,text): + #Handle blocks of text (the negatives of blocks of code) and sets them in paragraphs + return f"""

{text}

""" + + def block_code(self, code, info=None): + # Handle code blocks (triple backticks) + if info not in self.RENDERING_LEXER_MAPPING.keys(): info = 'bash' #defaults in bash + code_block_highlighted = highlight(code.strip(), self.RENDERING_LEXER_MAPPING[info](stripall=True), HtmlFormatter()) + return f"""
+
+ {info} +
+
{code_block_highlighted} +
+
""" + + def codespan(self, text): + # Handle inline code snippets (single backticks) + return f"""{text}""" class ChatWrapper: @@ -34,12 +92,22 @@ def __init__(self): # load configs self.config = Config_Loader().config self.global_config = self.config["global"] + self.utils_config = self.config["utils"] self.data_path = self.global_config["DATA_PATH"] # initialize data manager self.data_manager = DataManager() self.data_manager.update_vectorstore() + # store postgres connection info + self.pg_config = { + "password": read_secret("POSTGRES_PASSWORD"), + **self.utils_config["postgres"], + } + self.conn = None + self.cursor = None + + # initialize lock and chain self.lock = Lock() self.chain = Chain() self.number_of_queries = 0 @@ -57,57 +125,6 @@ def convert_to_app_history(history): return [list(entry) for entry in history] - @staticmethod - def convert_to_chain_history(history): - """ - Input: the history in the form of a list of lists, where the first entry of each tuple is - the author of the text and the second entry is the text itself - - Output: the history in the form of a list of tuples, where the first entry of each tuple is - the author of the text and the second entry is the text itself (native A2rchi history format) - """ - return [tuple(entry) for entry in history] - - - @staticmethod - def update_or_add_discussion(data_path, json_file, discussion_id, discussion_contents = None, discussion_feedback = None): - print(" INFO - entered update_or_add_discussion.") - - # read the existing JSON data from the file - data = {} - try: - with open(os.path.join(data_path, json_file), 'r') as f: - data = json.load(f) - print(" INFO - json_file found.") - - except FileNotFoundError: - # create data path if it doesn't exist - print(" ERROR - json_file not found. Creating a new one") - os.makedirs(data_path, exist_ok=True) - - # update or add discussion - discussion_dict = data.get(str(discussion_id), {}) - - discussion_dict["meta"] = discussion_dict.get("meta", {}) - if str(discussion_id) not in data.keys(): #first time in discusssion - discussion_dict["meta"]["time_first_used"] = time.time() - discussion_dict["meta"]["time_last_used"] = time.time() - - if discussion_contents is not None: - print(" INFO - found contents.") - discussion_dict["contents"] = discussion_contents - discussion_dict["meta"]["times_chain_was_called"] = discussion_dict["meta"]["times_chain_was_called"] + [time.time()] if ("times_chain_was_called" in discussion_dict["meta"].keys()) else [time.time()] - if discussion_feedback is not None: - print(" INFO - found feedback.") - discussion_dict["feedback"] = discussion_dict["feedback"] + [discussion_feedback] if ("feedback" in discussion_dict.keys() and isinstance(discussion_dict["feedback"], List)) else [discussion_feedback] - - data[str(discussion_id)] = discussion_dict - - # write the updated JSON data back to the file - with open(os.path.join(data_path, json_file), 'w') as f: - json.dump(data, f) - - @staticmethod def format_code_in_text(text): """ @@ -123,7 +140,92 @@ def format_code_in_text(text): return text - def __call__(self, history: Optional[List[Tuple[str, str]]], discussion_id: Optional[int]): + def insert_feedback(self, feedback): + """ + """ + # construct insert_tup (mid, feedback_ts, feedback, feedback_msg, incorrect, unhelpful, inappropriate) + insert_tup = ( + feedback['message_id'], + feedback['feedback_ts'], + feedback['feedback'], + feedback['feedback_msg'], + feedback['incorrect'], + feedback['unhelpful'], + feedback['inappropriate'], + ) + + # create connection to database + self.conn = psycopg2.connect(**self.pg_config) + self.cursor = self.conn.cursor() + self.cursor.execute(SQL_INSERT_FEEDBACK, insert_tup) + self.conn.commit() + + # clean up database connection state + self.cursor.close() + self.conn.close() + self.cursor, self.conn = None, None + + + def query_conversation_history(self, conversation_id): + """ + Return the conversation history as an ordered list of tuples. The order + is determined by ascending message_id. Each tuple contains the sender and + the message content + """ + # create connection to database + self.conn = psycopg2.connect(**self.pg_config) + self.cursor = self.conn.cursor() + + # query conversation history + self.cursor.execute(SQL_QUERY_CONVO, (conversation_id,)) + history = self.cursor.fetchall() + + # clean up database connection state + self.cursor.close() + self.conn.close() + self.cursor, self.conn = None, None + + return history + + + def insert_conversation(self, conversation_id, user_message, a2rchi_message, is_refresh=False) -> List[int]: + """ + """ + print(" INFO - entered insert_conversation.") + + # parse user message / a2rchi message if not None + user_sender, user_content, user_msg_ts = user_message + a2rchi_sender, a2rchi_content, a2rchi_msg_ts = a2rchi_message + + # construct insert_tups + insert_tups = ( + [ + # (conversation_id, sender, content, ts) + (conversation_id, user_sender, user_content, user_msg_ts), + (conversation_id, a2rchi_sender, a2rchi_content, a2rchi_msg_ts), + ] + if not is_refresh + else [ + (conversation_id, a2rchi_sender, a2rchi_content, a2rchi_msg_ts), + ] + ) + + # create connection to database + self.conn = psycopg2.connect(**self.pg_config) + self.cursor = self.conn.cursor() + psycopg2.extras.execute_values(self.cursor, SQL_INSERT_CONVO, insert_tups) + self.conn.commit() + message_ids = list(map(lambda tup: tup[0], self.cursor.fetchall())) + + # clean up database connection state + self.cursor.close() + self.conn.close() + self.cursor, self.conn = None, None + + return message_ids + + + def __call__(self, message: List[str], conversation_id: int, is_refresh: bool, msg_ts: datetime): """ Execute the chat functionality. """ @@ -141,64 +243,78 @@ def __call__(self, history: Optional[List[Tuple[str, str]]], discussion_id: Opti self.lock.release() print("INFO - released lock file update vectorstore") - # convert the history to native A2rchi form (because javascript does not have tuples) - history = self.convert_to_chain_history(history) - - # get discussion ID so that the conversation can be saved (It seems that random is no good... TODO) - discussion_id = discussion_id or np.random.randint(100000, 999999) - - # run chain to get result - if self.number_of_queries < QUERY_LIMIT: - result = self.chain(history) - else: - # the case where we have exceeded the QUERY LIMIT (built so that we do not overuse the chain) - output = "Sorry, our service is currently down due to exceptional demand. Please come again later." - return output, discussion_id - self.number_of_queries += 1 - print(f"number of queries is: {self.number_of_queries}") - - # get similarity score to see how close the input is to the source - # - low score means very close (it's a distance between embedding vectors approximated - # by an approximate k-nearest neighbors algorithm called HNSW) - inp = history[-1][1] - score = self.chain.similarity_search(inp) - - # load the present list of sources try: - with open(os.path.join(self.data_path, 'sources.yml'), 'r') as file: - sources = yaml.load(file, Loader=yaml.FullLoader) - except FileNotFoundError: - sources = dict() - - # get the closest source to the document - source = None - if len(result['source_documents']) > 0: - source_hash = result['source_documents'][0].metadata['source'] - if '/' in source_hash and '.' in source_hash: - source = source_hash.split('/')[-1].split('.')[0] - - # if the score is low enough, include the source as a link, otherwise give just the answer - embedding_name = self.config["utils"]["embeddings"]["EMBEDDING_NAME"] - similarity_score_reference = self.config["utils"]["embeddings"]["EMBEDDING_CLASS_MAP"][embedding_name]["similarity_score_reference"] - if score < similarity_score_reference and source in sources.keys(): - output = "

" + result["answer"] + "

" + "\n\n

Click here to read more

" - else: - output = "

" + result["answer"] + "

" - - self.lock.acquire() - try: - print("INFO - acquired lock file write json") - - ChatWrapper.update_or_add_discussion(self.data_path, "conversations_test.json", discussion_id, discussion_contents = history + [("A2rchi", output)]) + # convert the message to native A2rchi form (because javascript does not have tuples) + sender, content = tuple(message[0]) + + # TODO: incr. from 0? + # get discussion ID so that the conversation can be saved (It seems that random is no good... TODO) + conversation_id = conversation_id or np.random.randint(100000, 999999) + + # fetch history given conversation_id + history = self.query_conversation_history(conversation_id) + + # if this is a chat refresh / message regeneration; remove previous contiguous non-A2rchi message(s) + if is_refresh: + while history[-1][0] == "A2rchi": + _ = history.pop(-1) + + # run chain to get result; limit users to 1000 queries per conversation; refreshing browser starts new conversation + if len(history) < QUERY_LIMIT: + full_history = history + [(sender, content)] if not is_refresh else history + result = self.chain(full_history) + else: + # the case where we have exceeded the QUERY LIMIT (built so that we do not overuse the chain) + output = "Sorry, our service is currently down due to exceptional demand. Please come again later." + return output, conversation_id + + # keep track of total number of queries and log this amount + self.number_of_queries += 1 + print(f"number of queries is: {self.number_of_queries}") + + # get similarity score to see how close the input is to the source + # - low score means very close (it's a distance between embedding vectors approximated + # by an approximate k-nearest neighbors algorithm called HNSW) + score = self.chain.similarity_search(content) + + # load the present list of sources + try: + with open(os.path.join(self.data_path, 'sources.yml'), 'r') as file: + sources = yaml.load(file, Loader=yaml.FullLoader) + except FileNotFoundError: + sources = dict() + + # get the closest source to the document + source = None + if len(result['source_documents']) > 0: + source_hash = result['source_documents'][0].metadata['source'] + if '/' in source_hash and '.' in source_hash: + source = source_hash.split('/')[-1].split('.')[0] + + # if the score is low enough, include the source as a link, otherwise give just the answer + embedding_name = self.config["utils"]["embeddings"]["EMBEDDING_NAME"] + similarity_score_reference = self.config["utils"]["embeddings"]["EMBEDDING_CLASS_MAP"][embedding_name]["similarity_score_reference"] + if score < similarity_score_reference and source in sources.keys(): + output = "

" + self.format_code_in_text(result["answer"]) + "

" + "\n\n

Click here to read more

" + else: + output = "

" + self.format_code_in_text(result["answer"]) + "

" + + # write user message and A2rchi response to database + user_message = (sender, content, msg_ts) + a2rchi_message = ("A2rchi", output, datetime.now()) + + message_ids = self.insert_conversation(conversation_id, user_message, a2rchi_message, is_refresh) except Exception as e: print(f"ERROR - {str(e)}") finally: - self.lock.release() - print("INFO - released lock file write json") - - return output, discussion_id + if self.cursor is not None: + self.cursor.close() + if self.conn is not None: + self.conn.close() + + return output, conversation_id, message_ids class FlaskAppWrapper(object): @@ -239,31 +355,34 @@ def get_chat_response(self): functionality is carried through by javascript and html. Input is a requestion with - Discussion_id: Either None or an integer - Conversation: List of length 2 lists, where the length 2 - lists have first element either "User" or - "A2rchi" and have second element of a message - content. + conversation_id: Either None or an integer + last_message: list of length 2, where the first element is "User" + and the second element contains their message. Returns: A json with a response (html formatted plain text string) and a discussion ID (either None or an integer) """ - history = request.json.get('conversation') # get user input from the request - discussion_id = request.json.get('discussion_id') # get discussion_id from the request + # compute timestamp at which message was received by server + msg_ts = datetime.now() - # query the chat and return the results. + # get user input and conversation_id from the request + message = request.json.get('last_message') + conversation_id = request.json.get('conversation_id') + is_refresh = request.json.get('is_refresh') + + # query the chat and return the results. print(" INFO - Calling the ChatWrapper()") - response, discussion_id = self.chat(history, discussion_id) + response, conversation_id, message_ids = self.chat(message, conversation_id, is_refresh, msg_ts) - return jsonify({'response': response, 'discussion_id': discussion_id}) + return jsonify({'response': response, 'conversation_id': conversation_id, 'a2rchi_msg_id': message_ids[-1]}) def index(self): return render_template('index.html') - + def terms(self): return render_template('terms.html') - + def like(self): self.chat.lock.acquire() print("INFO - acquired lock file") @@ -272,21 +391,24 @@ def like(self): data = request.json # Extract the HTML content and any other data you need - chat_content = data.get('content') - discussion_id = data.get('discussion_id') message_id = data.get('message_id') feedback = { - "chat_content" : chat_content, - "message_id" : message_id, - "feedback" : "like", + "message_id" : message_id, + "feedback" : "like", + "feedback_ts" : datetime.now(), + "feedback_msg" : None, + "incorrect" : None, + "unhelpful" : None, + "inappropriate": None, } - ChatWrapper.update_or_add_discussion(self.data_path, "conversations_test.json", discussion_id, discussion_feedback = feedback) + self.chat.insert_feedback(feedback) - response = {'message': 'Liked', 'content': chat_content} + response = {'message': 'Liked'} return jsonify(response), 200 except Exception as e: + print(f"ERROR: {str(e)}") return jsonify({'error': str(e)}), 500 # According to the Python documentation: https://docs.python.org/3/tutorial/errors.html#defining-clean-up-actions @@ -295,6 +417,11 @@ def like(self): self.chat.lock.release() print("INFO - released lock file") + if self.chat.cursor is not None: + self.chat.cursor.close() + if self.chat.conn is not None: + self.chat.conn.close() + def dislike(self): self.chat.lock.acquire() print("INFO - acquired lock file") @@ -303,29 +430,28 @@ def dislike(self): data = request.json # Extract the HTML content and any other data you need - chat_content = data.get('content') - discussion_id = data.get('discussion_id') message_id = data.get('message_id') - message = data.get('message') + feedback_msg = data.get('feedback_msg') incorrect = data.get('incorrect') unhelpful = data.get('unhelpful') inappropriate = data.get('inappropriate') feedback = { - "chat_content" : chat_content, - "message_id" : message_id, - "feedback" : "dislike", - "message" : message, - "incorrect" : incorrect, - "unhelpful" : unhelpful, - "inappropriate": inappropriate, + "message_id" : message_id, + "feedback" : "dislike", + "feedback_ts" : datetime.now(), + "feedback_msg" : feedback_msg, + "incorrect" : incorrect, + "unhelpful" : unhelpful, + "inappropriate": inappropriate, } - ChatWrapper.update_or_add_discussion(self.data_path, "conversations_test.json", discussion_id, discussion_feedback = feedback) + self.chat.insert_feedback(feedback) - response = {'message': 'Disliked', 'content': chat_content} + response = {'message': 'Disliked'} return jsonify(response), 200 except Exception as e: + print(f"ERROR: {str(e)}") return jsonify({'error': str(e)}), 500 # According to the Python documentation: https://docs.python.org/3/tutorial/errors.html#defining-clean-up-actions @@ -334,48 +460,7 @@ def dislike(self): self.chat.lock.release() print("INFO - released lock file") - -class AnswerRenderer(mt.HTMLRenderer): - """ - Class for custom rendering of A2rchi output. Child of mistune's HTMLRenderer, with custom overrides. - Code blocks are structured and colored according to pygment lexers - """ - RENDERING_LEXER_MAPPING = { - "python": PythonLexer, - "java": JavaLexer, - "javascript": JavascriptLexer, - "bash": BashLexer, - "c++": CppLexer, - "cpp": CppLexer, - "c": CLexer, - "typescript": TypeScriptLexer, - "html": HtmlLexer, - "Fortran" : FortranLexer, - "Julia" : JuliaLexer, - "Mathematica" : MathematicaLexer, - "MATLAB": MatlabLexer - } - - def __init__(self): - super().__init__() - - def block_text(self,text): - #Handle blocks of text (the negatives of blocks of code) and sets them in paragraphs - return f"""

{text}

""" - - def block_code(self, code, info=None): - # Handle code blocks (triple backticks) - if info not in self.RENDERING_LEXER_MAPPING.keys(): info = 'bash' #defaults in bash - code_block_highlighted = highlight(code.strip(), self.RENDERING_LEXER_MAPPING[info](stripall=True), HtmlFormatter()) - return f"""
-
- {info} -
-
{code_block_highlighted} -
-
""" - - def codespan(self, text): - # Handle inline code snippets (single backticks) - return f"""{text}""" - + if self.chat.cursor is not None: + self.chat.cursor.close() + if self.chat.conn is not None: + self.chat.conn.close() diff --git a/A2rchi/interfaces/chat_app/static/images/a2rchi-initial.png b/A2rchi/interfaces/chat_app/static/images/a2rchi-initial.png new file mode 100644 index 00000000..f2a562df Binary files /dev/null and b/A2rchi/interfaces/chat_app/static/images/a2rchi-initial.png differ diff --git a/A2rchi/interfaces/chat_app/static/images/a2rchi.png b/A2rchi/interfaces/chat_app/static/images/a2rchi.png index f2a562df..24402e62 100644 Binary files a/A2rchi/interfaces/chat_app/static/images/a2rchi.png and b/A2rchi/interfaces/chat_app/static/images/a2rchi.png differ diff --git a/A2rchi/interfaces/chat_app/static/script.js b/A2rchi/interfaces/chat_app/static/script.js deleted file mode 100644 index 7f1228e9..00000000 --- a/A2rchi/interfaces/chat_app/static/script.js +++ /dev/null @@ -1,306 +0,0 @@ -const chatInput = document.querySelector("#chat-input"); -const sendButton = document.querySelector("#send-btn"); -const chatContainer = document.querySelector(".chat-container"); -const themeButton = document.querySelector("#theme-btn"); -const deleteButton = document.querySelector("#delete-btn"); -const refreshButton = document.querySelector("#refresh-btn"); -const popupForm = document.getElementById("popup-form"); -const additionalThoughtsInput = document.getElementById("dislike-additional-thoughts"); -const submitButton = document.getElementById("dislike-submit-button"); -const closeButton = document.getElementById("dislike-close-button"); -const correct_checkbox= document.getElementById("correct_checkbox"); -const helpful_checkbox = document.getElementById("helpful_checkbox"); -const appropriate_checkbox = document.getElementById("appropriate_checkbox"); -popupForm.style.display = "none"; - -let userText = null; -let discussion_id = null; -let next_message_id = 0; -let conversation = [] -let num_responses_since_last_rating = 0; - -const loadDataFromLocalstorage = () => { - // Load saved chats and theme from local storage and apply/add on the page - const themeColor = localStorage.getItem("themeColor"); - - document.body.classList.toggle("light-mode", themeColor === "light_mode"); - themeButton.innerText = document.body.classList.contains("light-mode") ? "dark_mode" : "light_mode"; - - const defaultText = `
-

A2rchi

-

Start a conversation and explore the power of A2rchi, specially trained on subMIT.
- Your chat history will be displayed here.

- By using this website, you agree to the terms and conditions.

-
` - - chatContainer.innerHTML = localStorage.getItem("all-chats") || defaultText; - chatContainer.scrollTo(0, chatContainer.scrollHeight); // Scroll to bottom of the chat container -} - -const createChatElement = (content, className) => { - // Create new div and apply chat, specified class and set html content of div - const chatDiv = document.createElement("div"); - chatDiv.classList.add("chat", className); - chatDiv.innerHTML = content; - return chatDiv; // Return the created chat div -} - -const refreshChat = async () => { - conversation.pop(); - if (next_message_id > 0) { - next_message_id = next_message_id -1; - } - chatContainer.removeChild(chatContainer.lastChild); - showTypingAnimation(); -} - -const getChatResponse = async (incomingChatDiv) => { - const API_URL = "http://t3desk019.mit.edu:7861/api/get_chat_response"; - const pElement = document.createElement("div"); - - // Give the p element of the response an id which is equal to the message id - pElement.setAttribute('id',next_message_id.toString()); - next_message_id = next_message_id + 1 - - // Define the properties and data for the API request - const requestOptions = { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - conversation: conversation, - discussion_id: discussion_id, - }) - } - - // Send POST request to Flask API, get response and set the response as paragraph element text - try { - const response = await (await fetch(API_URL, requestOptions)).json(); - pElement.innerHTML = response.response; - pElement.classList.add(".default-text"); - conversation.push(["A2rchi", response.response]); - discussion_id = response.discussion_id ; - } catch (error) { - pElement.classList.add("error"); - pElement.textContent = "Oops! Something went wrong while retrieving the response. Please try again."; - } - - // Remove the typing animation, append the paragraph element and save the chats to local storage - incomingChatDiv.querySelector(".typing-animation").remove(); - incomingChatDiv.querySelector(".chat-details").appendChild(pElement); - localStorage.setItem("all-chats", chatContainer.innerHTML); - chatContainer.scrollTo(0, chatContainer.scrollHeight); -} - -const copyCode = (copyCodeBtn) => { - // Copy the text content of the response to the clipboard - const reponseTextElement = copyCodeBtn.parentElement.parentElement.querySelector(".code-box-body"); - navigator.clipboard.writeText(reponseTextElement.innerText); -} - -const copyResponse = (copyBtn) => { - // Copy the text content of the response to the clipboard - const reponseTextElement = copyBtn.parentElement.previousElementSibling.querySelector("p"); - navigator.clipboard.writeText(reponseTextElement.textContent); -} - -const likeResponse = (likeBtn) => { - num_responses_since_last_rating = 0; - - const chatContent = likeBtn.parentElement.previousElementSibling.querySelector("p").textContent; - - // fill the image - const image = likeBtn.querySelector("img"); - image.src = "/static/images/thumbs_up_filled.png" - - // make sure other image is not filled - const other_image = likeBtn.nextElementSibling.querySelector("img"); - other_image.src = "/static/images/thumbs_down.png"; - - const API_URL = "http://t3desk019.mit.edu:7861/api/like"; - - // Send an API request with the chat content and discussion ID - fetch(API_URL, { - method: "POST", // You may need to adjust the HTTP method - headers: { - "Content-Type": "application/json", - "Access-Control-Allow-Origin": "*", - }, - body: JSON.stringify({ - content: chatContent, - discussion_id: discussion_id, - message_id: likeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, - }), - }) -} - -const dislikeResponse = (dislikeBtn) => { - num_responses_since_last_rating = 0; - - const chatContent = dislikeBtn.parentElement.previousElementSibling.querySelector("p").textContent; - - // fill the image - const image = dislikeBtn.querySelector("img"); - image.src = "/static/images/thumbs_down_filled.png"; - - // make sure other image is not filled - const other_image = dislikeBtn.previousElementSibling.querySelector("img"); - other_image.src = "/static/images/thumbs_up.png"; - - const API_URL = "http://t3desk019.mit.edu:7861/api/dislike"; - - // Show pop-up form - popupForm.style.display = "block"; - - // Function which handles sending the information in the pop up form to the backend API - function handleSubmitToAPI() { - const additionalThoughts = additionalThoughtsInput.value; - - fetch(API_URL, { - method: "POST", // You may need to adjust the HTTP method - headers: { - "Content-Type": "application/json", - "Access-Control-Allow-Origin": "*", - }, - body: JSON.stringify({ - content: chatContent, - discussion_id: discussion_id, - message_id: dislikeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, - message: additionalThoughts, - incorrect: correct_checkbox.checked, - unhelpful: helpful_checkbox.checked, - inappropriate: appropriate_checkbox.checked, - }), - }); - - //hide pop up formi - popupForm.style.display = "none"; - } - - //wait for user to submit response or close the additional feedback window - submitButton.addEventListener("click", handleSubmitToAPI); - closeButton.addEventListener("click", handleSubmitToAPI); -} - -const closeFeedback = (closeBtn) => { - //hide pop up formi - popupForm.style.display = "none"; -} - -const showTypingAnimation = () => { - // Display the typing animation and call the getChatResponse function - const html = `
-
- chatbot-img -
-
-
-
-
-
-
- - -
-
`; - // Create an incoming chat div with typing animation and append it to chat container - const incomingChatDiv = createChatElement(html, "incoming"); - chatContainer.appendChild(incomingChatDiv); - chatContainer.scrollTo(0, chatContainer.scrollHeight); - getChatResponse(incomingChatDiv); - showFeedbackRequest(); -} - -const showFeedbackRequest = () => { - // Display a message from A2rchi to ask the user to give feedback - - num_responses_since_last_rating = num_responses_since_last_rating + 1; - - const html = `
-
- chatbot-img -
-

I've noticed you haven't rated any of my responses in awhile. Rating responses is crucial because it not only helps me improve, but it also ensures that this project remains open source and freely accessible for everyone. Your input is highly valuable in supporting the A2rchi mission!

-
-
-
`; - - // Create an incoming chat div with feedback request and append it to chat container - if (num_responses_since_last_rating > 2) { - const incomingChatDiv = createChatElement(html, "incoming"); - chatContainer.appendChild(incomingChatDiv); - chatContainer.scrollTo(0, chatContainer.scrollHeight); - num_responses_since_last_rating = 0; - } -} - -const handleOutgoingChat = () => { - userText = chatInput.value.trim(); // Get chatInput value and remove extra spaces - if(!userText) return; // If chatInput is empty return from here - conversation.push(["User", userText]) - - // Clear the input field and reset its height - chatInput.value = ""; - chatInput.style.height = `${initialInputHeight}px`; - - const html = `
-
- user-img -

${userText}

-
-
`; - - // Create an outgoing chat div with user's message and append it to chat container - const outgoingChatDiv = createChatElement(html, "outgoing"); - chatContainer.querySelector(".default-text")?.remove(); - chatContainer.appendChild(outgoingChatDiv); - chatContainer.scrollTo(0, chatContainer.scrollHeight); - setTimeout(showTypingAnimation, 500); -} - -deleteButton.addEventListener("click", () => { - // Remove the chats from local storage and call loadDataFromLocalstorage function - if(confirm("Are you sure you want to delete all the chats?")) { - conversation = [] - discussion_id = null - next_message_id = 0; - localStorage.removeItem("all-chats"); - loadDataFromLocalstorage(); - } -}); - -refreshButton.addEventListener("click", () => { - refreshChat(); -}); - -themeButton.addEventListener("click", () => { - // Toggle body's class for the theme mode and save the updated theme to the local storage - document.body.classList.toggle("light-mode"); - localStorage.setItem("themeColor", themeButton.innerText); - themeButton.innerText = document.body.classList.contains("light-mode") ? "dark_mode" : "light_mode"; -}); - -const initialInputHeight = chatInput.scrollHeight; - -chatInput.addEventListener("input", () => { - // Adjust the height of the input field dynamically based on its content - chatInput.style.height = `${initialInputHeight}px`; - chatInput.style.height = `${chatInput.scrollHeight}px`; -}); - -chatInput.addEventListener("keydown", (e) => { - // If the Enter key is pressed without Shift and the window width is larger - // than 800 pixels, handle the outgoing chat - if (e.key === "Enter" && !e.shiftKey && window.innerWidth > 800) { - e.preventDefault(); - handleOutgoingChat(); - } -}); - -loadDataFromLocalstorage(); -sendButton.addEventListener("click", handleOutgoingChat); diff --git a/A2rchi/interfaces/chat_app/static/script.js-template b/A2rchi/interfaces/chat_app/static/script.js-template index 83f5101e..86743b69 100644 --- a/A2rchi/interfaces/chat_app/static/script.js-template +++ b/A2rchi/interfaces/chat_app/static/script.js-template @@ -14,10 +14,10 @@ const appropriate_checkbox = document.getElementById("appropriate_checkbox"); popupForm.style.display = "none"; let userText = null; -let discussion_id = null; -let next_message_id = 0; +let conversation_id = null; let conversation = [] let num_responses_since_last_rating = 0; +let last_response_is_feedback_request = false; const loadDataFromLocalstorage = () => { // Load saved chats and theme from local storage and apply/add on the page @@ -46,22 +46,30 @@ const createChatElement = (content, className) => { } const refreshChat = async () => { - conversation.pop(); - if (next_message_id > 0) { - next_message_id = next_message_id -1; + // if the conversation is empty; this is a no-op + if (conversation.length == 0) { + return; } + + // remove message to be regenerated from conversation + conversation.pop(); chatContainer.removeChild(chatContainer.lastChild); - showTypingAnimation(); + + // if the last response generated a feedback request, make sure to remove it here as well + console.log(last_response_is_feedback_request) + if (last_response_is_feedback_request) { + console.log("remove again") + chatContainer.removeChild(chatContainer.lastChild); + } + + // generate new response + showTypingAnimation(isRefresh=true); } -const getChatResponse = async (incomingChatDiv) => { +const getChatResponse = async (incomingChatDiv, isRefresh=false) => { const API_URL = "http://XX-HOSTNAME-XX:XX-HTTP_PORT-XX/api/get_chat_response"; const pElement = document.createElement("div"); - // Give the p element of the response an id which is equal to the message id - pElement.setAttribute('id',next_message_id.toString()); - next_message_id = next_message_id + 1 - // Define the properties and data for the API request const requestOptions = { method: "POST", @@ -69,18 +77,21 @@ const getChatResponse = async (incomingChatDiv) => { "Content-Type": "application/json", }, body: JSON.stringify({ - conversation: conversation, - discussion_id: discussion_id, + last_message: conversation.slice(-1), + conversation_id: conversation_id, + is_refresh: isRefresh, }) } - // Send POST request to Flask API, get response and set the response as paragraph element text - try { + // Send POST request to Flask API, get response and set the response as paragraph element text + try { const response = await (await fetch(API_URL, requestOptions)).json(); pElement.innerHTML = response.response; + pElement.setAttribute('id', response.a2rchi_msg_id.toString()); pElement.classList.add(".default-text"); conversation.push(["A2rchi", response.response]); - discussion_id = response.discussion_id ; + conversation_id = response.conversation_id; + last_response_is_feedback_request = false; } catch (error) { pElement.classList.add("error"); pElement.textContent = "Oops! Something went wrong while retrieving the response. Please try again."; @@ -91,6 +102,9 @@ const getChatResponse = async (incomingChatDiv) => { incomingChatDiv.querySelector(".chat-details").appendChild(pElement); localStorage.setItem("all-chats", chatContainer.innerHTML); chatContainer.scrollTo(0, chatContainer.scrollHeight); + + // ask user for feedback if it's been too many messages w/out any feedback + setTimeout(showFeedbackRequest, 500); } const copyCode = (copyCodeBtn) => { @@ -108,8 +122,6 @@ const copyResponse = (copyBtn) => { const likeResponse = (likeBtn) => { num_responses_since_last_rating = 0; - const chatContent = likeBtn.parentElement.previousElementSibling.querySelector("p").textContent; - // fill the image const image = likeBtn.querySelector("img"); image.src = "/static/images/thumbs_up_filled.png" @@ -127,9 +139,7 @@ const likeResponse = (likeBtn) => { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*", }, - body: JSON.stringify({ - content: chatContent, - discussion_id: discussion_id, + body: JSON.stringify({ message_id: likeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, }), }) @@ -138,8 +148,6 @@ const likeResponse = (likeBtn) => { const dislikeResponse = (dislikeBtn) => { num_responses_since_last_rating = 0; - const chatContent = dislikeBtn.parentElement.previousElementSibling.querySelector("p").textContent; - // fill the image const image = dislikeBtn.querySelector("img"); image.src = "/static/images/thumbs_down_filled.png"; @@ -164,10 +172,8 @@ const dislikeResponse = (dislikeBtn) => { "Access-Control-Allow-Origin": "*", }, body: JSON.stringify({ - content: chatContent, - discussion_id: discussion_id, message_id: dislikeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, - message: additionalThoughts, + feedback_msg: additionalThoughts, incorrect: correct_checkbox.checked, unhelpful: helpful_checkbox.checked, inappropriate: appropriate_checkbox.checked, @@ -179,16 +185,16 @@ const dislikeResponse = (dislikeBtn) => { } //wait for user to submit response or close the additional feedback window - submitButton.addEventListener("click", handleSubmitToAPI); - closeButton.addEventListener("click", handleSubmitToAPI); + submitButton.addEventListener("click", handleSubmitToAPI, {once: true}); + closeButton.addEventListener("click", handleSubmitToAPI, {once: true}); } const closeFeedback = (closeBtn) => { - //hide pop up formi + //hide pop up form popupForm.style.display = "none"; } -const showTypingAnimation = () => { +const showTypingAnimation = (isRefresh=false) => { // Display the typing animation and call the getChatResponse function const html = `
@@ -212,8 +218,7 @@ const showTypingAnimation = () => { const incomingChatDiv = createChatElement(html, "incoming"); chatContainer.appendChild(incomingChatDiv); chatContainer.scrollTo(0, chatContainer.scrollHeight); - getChatResponse(incomingChatDiv); - showFeedbackRequest(); + getChatResponse(incomingChatDiv, isRefresh); } const showFeedbackRequest = () => { @@ -225,7 +230,7 @@ const showFeedbackRequest = () => {
chatbot-img
-

I've noticed you haven't rated any of my responses in awhile. Rating responses is crucial because it not only helps me improve, but it also ensures that this project remains open source and freely accessible for everyone. Your input is highly valuable in supporting the A2rchi mission!

+

I've noticed you haven't rated any of my responses in a while. Rating responses is crucial because it not only helps me improve, but it also ensures that this project remains open source and freely accessible for everyone. Your input is highly valuable in supporting the A2rchi mission!

`; @@ -236,6 +241,7 @@ const showFeedbackRequest = () => { chatContainer.appendChild(incomingChatDiv); chatContainer.scrollTo(0, chatContainer.scrollHeight); num_responses_since_last_rating = 0; + last_response_is_feedback_request = true; } } @@ -267,8 +273,7 @@ deleteButton.addEventListener("click", () => { // Remove the chats from local storage and call loadDataFromLocalstorage function if(confirm("Are you sure you want to delete all the chats?")) { conversation = [] - discussion_id = null - next_message_id = 0; + conversation_id = null localStorage.removeItem("all-chats"); loadDataFromLocalstorage(); } diff --git a/A2rchi/interfaces/cleo.py b/A2rchi/interfaces/cleo.py index 557b9ef6..41a18960 100644 --- a/A2rchi/interfaces/cleo.py +++ b/A2rchi/interfaces/cleo.py @@ -20,7 +20,6 @@ class CleoAIWrapper: def __init__(self): self.chain = Chain() - self.number_of_queries = 0 #TODO: finish installing this safegaurd. # initialize data manager self.data_manager = DataManager() diff --git a/A2rchi/utils/sql.py b/A2rchi/utils/sql.py new file mode 100644 index 00000000..c2b780db --- /dev/null +++ b/A2rchi/utils/sql.py @@ -0,0 +1,6 @@ +"""SQL queries used by A2rchi""" +SQL_INSERT_CONVO = "INSERT INTO conversations (conversation_id, sender, content, ts) VALUES %s RETURNING message_id;" + +SQL_INSERT_FEEDBACK = "INSERT INTO feedback (mid, feedback_ts, feedback, feedback_msg, incorrect, unhelpful, inappropriate) VALUES (%s, %s, %s, %s, %s, %s, %s);" + +SQL_QUERY_CONVO = "SELECT sender, content FROM conversations WHERE conversation_id = %s ORDER BY message_id ASC;" diff --git a/config/dev-config.yaml b/config/dev-config.yaml index 404403e2..d6c1e041 100644 --- a/config/dev-config.yaml +++ b/config/dev-config.yaml @@ -13,7 +13,7 @@ interfaces: PORT: 7861 EXTERNAL_PORT: 7682 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) - HOSTNAME: "t3desk019.mit.edu" # careful, this is used for the chat service + HOSTNAME: "submit06.mit.edu" # careful, this is used for the chat service template_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/templates" static_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/static" num_responses_until_feedback: 3 #the number of responses given by A2rchi until she asks for feedback. @@ -83,6 +83,11 @@ utils: mailbox: IMAP4_PORT: 143 mailbox_update_time: 10 + postgres: + port: 5432 + user: a2rchi + database: a2rchi-db + host: dev-postgres-dev-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 diff --git a/config/prod-801-config.yaml b/config/prod-801-config.yaml index 15c0f772..8feda008 100644 --- a/config/prod-801-config.yaml +++ b/config/prod-801-config.yaml @@ -81,6 +81,11 @@ utils: mailbox: IMAP4_PORT: 143 mailbox_update_time: 10 + postgres: + port: 5432 + user: a2rchi + database: a2rchi-db + host: prod-801-postgres-prod-801-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 diff --git a/config/prod-config.yaml b/config/prod-config.yaml index f373016c..f8c4ef30 100644 --- a/config/prod-config.yaml +++ b/config/prod-config.yaml @@ -81,6 +81,11 @@ utils: mailbox: IMAP4_PORT: 143 mailbox_update_time: 10 + postgres: + port: 5432 + user: a2rchi + database: a2rchi-db + host: prod-postgres-prod-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 diff --git a/config/prod-root-config.yaml b/config/prod-root-config.yaml index 782a1820..6dfecfab 100644 --- a/config/prod-root-config.yaml +++ b/config/prod-root-config.yaml @@ -82,6 +82,11 @@ utils: mailbox: IMAP4_PORT: 143 mailbox_update_time: 10 + postgres: + port: 5432 + user: a2rchi + database: a2rchi-db + host: prod-root-postgres-prod-root-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 diff --git a/deploy/dev/dev-compose.yaml b/deploy/dev/dev-compose.yaml index 369897c1..55113b5b 100644 --- a/deploy/dev/dev-compose.yaml +++ b/deploy/dev/dev-compose.yaml @@ -9,6 +9,8 @@ services: depends_on: chromadb-dev: condition: service_healthy + postgres-dev: + condition: service_healthy environment: RUNTIME_ENV: dev CLEO_URL_FILE: /run/secrets/cleo_url @@ -22,6 +24,7 @@ services: SENDER_PW_FILE: /run/secrets/sender_pw OPENAI_API_KEY_FILE: /run/secrets/openai_api_key HUGGING_FACE_HUB_TOKEN_FILE: /run/secrets/hf_token + POSTGRES_PASSWORD_FILE: /run/secrets/pg_password secrets: - cleo_url - cleo_user @@ -34,6 +37,7 @@ services: - sender_pw - openai_api_key - hf_token + - pg_password volumes: - a2rchi-dev-data:/root/data/ logging: @@ -51,13 +55,17 @@ services: depends_on: chromadb-dev: condition: service_healthy + postgres-dev: + condition: service_healthy environment: RUNTIME_ENV: dev OPENAI_API_KEY_FILE: /run/secrets/openai_api_key HUGGING_FACE_HUB_TOKEN_FILE: /run/secrets/hf_token + POSTGRES_PASSWORD_FILE: /run/secrets/pg_password secrets: - openai_api_key - hf_token + - pg_password volumes: - a2rchi-dev-data:/root/data/ logging: @@ -77,6 +85,8 @@ services: depends_on: chromadb-dev: condition: service_healthy + postgres-dev: + condition: service_healthy environment: RUNTIME_ENV: dev IMAP_USER_FILE: /run/secrets/imap_user @@ -121,6 +131,8 @@ services: depends_on: chromadb-dev: condition: service_healthy + postgres-dev: + condition: service_healthy environment: RUNTIME_ENV: dev FLASK_UPLOADER_APP_SECRET_KEY_FILE: /run/secrets/flask_uploader_app_secret_key @@ -165,9 +177,33 @@ services: start_period: 10s start_interval: 5s + postgres-dev: + image: postgres:16 + environment: + RUNTIME_ENV: dev + POSTGRES_PASSWORD_FILE: /run/secrets/pg_password + POSTGRES_USER: a2rchi + POSTGRES_DB: a2rchi-db + secrets: + - pg_password + volumes: + - ./dev-init.sql:/docker-entrypoint-initdb.d/init.sql + - a2rchi-dev-pg-data:/var/lib/postgresql/data + logging: + options: + max-size: 10m + restart: always + healthcheck: + test: ["CMD-SHELL", "pg_isready -U a2rchi -d a2rchi-db"] + interval: 10s + timeout: 5s + retries: 5 + volumes: a2rchi-dev-data: external: true + a2rchi-dev-pg-data: + external: true secrets: imap_user: @@ -200,3 +236,5 @@ secrets: file: secrets/openai_api_key.txt hf_token: file: secrets/hf_token.txt + pg_password: + file: secrets/pg_password.txt diff --git a/deploy/dev/dev-init.sql b/deploy/dev/dev-init.sql new file mode 100644 index 00000000..90d49c61 --- /dev/null +++ b/deploy/dev/dev-init.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS conversations ( + conversation_id INTEGER NOT NULL, + message_id SERIAL, + sender TEXT NOT NULL, + content TEXT NOT NULL, + ts TIMESTAMP NOT NULL, + PRIMARY KEY (message_id) +); +CREATE TABLE IF NOT EXISTS feedback ( + mid INTEGER NOT NULL, + feedback_ts TIMESTAMP NOT NULL, + feedback TEXT NOT NULL, + feedback_msg TEXT, + incorrect BOOLEAN, + unhelpful BOOLEAN, + inappropriate BOOLEAN, + PRIMARY KEY (mid, feedback_ts), + FOREIGN KEY (mid) REFERENCES conversations(message_id) +); \ No newline at end of file diff --git a/deploy/dev/dev-install.sh b/deploy/dev/dev-install.sh index 29180feb..ba11b725 100755 --- a/deploy/dev/dev-install.sh +++ b/deploy/dev/dev-install.sh @@ -1,11 +1,17 @@ #!/bin/bash -# create volume if it doesn't already exist +# create volume if it doesn't already exist for app data exists=`docker volume ls | awk '{print $2}' | grep a2rchi-dev-data` if [[ $exists != 'a2rchi-dev-data' ]]; then docker volume create --name a2rchi-dev-data fi +# create volume if it doesn't already exist for postgres data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-dev-pg-data` +if [[ $exists != 'a2rchi-dev-pg-data' ]]; then + docker volume create --name a2rchi-dev-pg-data +fi + # build base image; try to reuse previously built image cd A2rchi-dev/deploy/dev/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. diff --git a/deploy/prod-801/prod-801-compose.yaml b/deploy/prod-801/prod-801-compose.yaml index 887f8733..50612dce 100644 --- a/deploy/prod-801/prod-801-compose.yaml +++ b/deploy/prod-801/prod-801-compose.yaml @@ -81,9 +81,33 @@ services: start_period: 10s start_interval: 5s + postgres-prod-801: + image: postgres:16 + environment: + RUNTIME_ENV: prod-801 + POSTGRES_PASSWORD_FILE: /run/secrets/pg_password + POSTGRES_USER: a2rchi + POSTGRES_DB: a2rchi-db + secrets: + - pg_password + volumes: + - ./prod-801-init.sql:/docker-entrypoint-initdb.d/init.sql + - a2rchi-prod-801-pg-data:/var/lib/postgresql/data + logging: + options: + max-size: 10m + restart: always + healthcheck: + test: ["CMD-SHELL", "pg_isready -U a2rchi -d a2rchi-db"] + interval: 10s + timeout: 5s + retries: 5 + volumes: a2rchi-prod-801-data: external: true + a2rchi-prod-801-pg-data: + external: true secrets: flask_uploader_app_secret_key: @@ -94,3 +118,5 @@ secrets: file: secrets/openai_api_key.txt hf_token: file: secrets/hf_token.txt + pg_password: + file: secrets/pg_password.txt diff --git a/deploy/prod-801/prod-801-init.sql b/deploy/prod-801/prod-801-init.sql new file mode 100644 index 00000000..90d49c61 --- /dev/null +++ b/deploy/prod-801/prod-801-init.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS conversations ( + conversation_id INTEGER NOT NULL, + message_id SERIAL, + sender TEXT NOT NULL, + content TEXT NOT NULL, + ts TIMESTAMP NOT NULL, + PRIMARY KEY (message_id) +); +CREATE TABLE IF NOT EXISTS feedback ( + mid INTEGER NOT NULL, + feedback_ts TIMESTAMP NOT NULL, + feedback TEXT NOT NULL, + feedback_msg TEXT, + incorrect BOOLEAN, + unhelpful BOOLEAN, + inappropriate BOOLEAN, + PRIMARY KEY (mid, feedback_ts), + FOREIGN KEY (mid) REFERENCES conversations(message_id) +); \ No newline at end of file diff --git a/deploy/prod-801/prod-801-install.sh b/deploy/prod-801/prod-801-install.sh index 6ecab7e7..867b4607 100644 --- a/deploy/prod-801/prod-801-install.sh +++ b/deploy/prod-801/prod-801-install.sh @@ -6,6 +6,12 @@ if [[ $exists != 'a2rchi-prod-801-data' ]]; then docker volume create --name a2rchi-prod-801-data fi +# create volume if it doesn't already exist for postgres data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-801-pg-data` +if [[ $exists != 'a2rchi-prod-801-pg-data' ]]; then + docker volume create --name a2rchi-prod-801-pg-data +fi + # build base image; try to reuse previously built image cd A2rchi-prod-801/deploy/prod-801/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. diff --git a/deploy/prod-root/prod-root-compose.yaml b/deploy/prod-root/prod-root-compose.yaml index 844528e8..f581b2d7 100644 --- a/deploy/prod-root/prod-root-compose.yaml +++ b/deploy/prod-root/prod-root-compose.yaml @@ -80,9 +80,33 @@ services: start_period: 10s start_interval: 5s + postgres-prod-root: + image: postgres:16 + environment: + RUNTIME_ENV: prod-root + POSTGRES_PASSWORD_FILE: /run/secrets/pg_password + POSTGRES_USER: a2rchi + POSTGRES_DB: a2rchi-db + secrets: + - pg_password + volumes: + - ./prod-root-init.sql:/docker-entrypoint-initdb.d/init.sql + - a2rchi-prod-root-pg-data:/var/lib/postgresql/data + logging: + options: + max-size: 10m + restart: always + healthcheck: + test: ["CMD-SHELL", "pg_isready -U a2rchi -d a2rchi-db"] + interval: 10s + timeout: 5s + retries: 5 + volumes: a2rchi-prod-root-data: external: true + a2rchi-prod-root-pg-data: + external: true secrets: flask_uploader_app_secret_key: @@ -92,4 +116,6 @@ secrets: openai_api_key: file: secrets/openai_api_key.txt hf_token: - file: secrets/hf_token.txt \ No newline at end of file + file: secrets/hf_token.txt + pg_password: + file: secrets/pg_password.txt diff --git a/deploy/prod-root/prod-root-init.sql b/deploy/prod-root/prod-root-init.sql new file mode 100644 index 00000000..90d49c61 --- /dev/null +++ b/deploy/prod-root/prod-root-init.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS conversations ( + conversation_id INTEGER NOT NULL, + message_id SERIAL, + sender TEXT NOT NULL, + content TEXT NOT NULL, + ts TIMESTAMP NOT NULL, + PRIMARY KEY (message_id) +); +CREATE TABLE IF NOT EXISTS feedback ( + mid INTEGER NOT NULL, + feedback_ts TIMESTAMP NOT NULL, + feedback TEXT NOT NULL, + feedback_msg TEXT, + incorrect BOOLEAN, + unhelpful BOOLEAN, + inappropriate BOOLEAN, + PRIMARY KEY (mid, feedback_ts), + FOREIGN KEY (mid) REFERENCES conversations(message_id) +); \ No newline at end of file diff --git a/deploy/prod-root/prod-root-install.sh b/deploy/prod-root/prod-root-install.sh index 11639d86..dfc041bd 100644 --- a/deploy/prod-root/prod-root-install.sh +++ b/deploy/prod-root/prod-root-install.sh @@ -6,6 +6,12 @@ if [[ $exists != 'a2rchi-prod-root-data' ]]; then docker volume create --name a2rchi-prod-root-data fi +# create volume if it doesn't already exist for postgres data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-root-pg-data` +if [[ $exists != 'a2rchi-prod-root-pg-data' ]]; then + docker volume create --name a2rchi-prod-root-pg-data +fi + # build base image; try to reuse previously built image cd A2rchi-prod-root/deploy/prod-root/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. diff --git a/deploy/prod/prod-compose.yaml b/deploy/prod/prod-compose.yaml index 98d32cfb..8bf7d66a 100644 --- a/deploy/prod/prod-compose.yaml +++ b/deploy/prod/prod-compose.yaml @@ -165,9 +165,33 @@ services: start_period: 10s start_interval: 5s + postgres-prod: + image: postgres:16 + environment: + RUNTIME_ENV: prod + POSTGRES_PASSWORD_FILE: /run/secrets/pg_password + POSTGRES_USER: a2rchi + POSTGRES_DB: a2rchi-db + secrets: + - pg_password + volumes: + - ./prod-init.sql:/docker-entrypoint-initdb.d/init.sql + - a2rchi-prod-pg-data:/var/lib/postgresql/data + logging: + options: + max-size: 10m + restart: always + healthcheck: + test: ["CMD-SHELL", "pg_isready -U a2rchi -d a2rchi-db"] + interval: 10s + timeout: 5s + retries: 5 + volumes: a2rchi-prod-data: external: true + a2rchi-prod-pg-data: + external: true secrets: imap_user: @@ -200,3 +224,5 @@ secrets: file: secrets/openai_api_key.txt hf_token: file: secrets/hf_token.txt + pg_password: + file: secrets/pg_password.txt diff --git a/deploy/prod/prod-init.sql b/deploy/prod/prod-init.sql new file mode 100644 index 00000000..90d49c61 --- /dev/null +++ b/deploy/prod/prod-init.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS conversations ( + conversation_id INTEGER NOT NULL, + message_id SERIAL, + sender TEXT NOT NULL, + content TEXT NOT NULL, + ts TIMESTAMP NOT NULL, + PRIMARY KEY (message_id) +); +CREATE TABLE IF NOT EXISTS feedback ( + mid INTEGER NOT NULL, + feedback_ts TIMESTAMP NOT NULL, + feedback TEXT NOT NULL, + feedback_msg TEXT, + incorrect BOOLEAN, + unhelpful BOOLEAN, + inappropriate BOOLEAN, + PRIMARY KEY (mid, feedback_ts), + FOREIGN KEY (mid) REFERENCES conversations(message_id) +); \ No newline at end of file diff --git a/deploy/prod/prod-install.sh b/deploy/prod/prod-install.sh index 7bedf15c..f6c1410d 100755 --- a/deploy/prod/prod-install.sh +++ b/deploy/prod/prod-install.sh @@ -6,6 +6,12 @@ if [[ $exists != 'a2rchi-prod-data' ]]; then docker volume create --name a2rchi-prod-data fi +# create volume if it doesn't already exist for postgres data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-pg-data` +if [[ $exists != 'a2rchi-prod-pg-data' ]]; then + docker volume create --name a2rchi-prod-pg-data +fi + # build base image; try to reuse previously built image cd A2rchi-prod/deploy/prod/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. diff --git a/pyproject.toml b/pyproject.toml index b76db279..792a6ec5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "pandas==2.1.0", "peft==0.5.0", "posthog==3.0.1", + "psycopg2==2.9.9", "pulsar-client==3.2.0", "pygments==2.16.1", "pypdf==3.16.1",