diff --git a/.github/workflows/dev-ci-cd.yaml b/.github/workflows/dev-ci-cd.yaml index 0bde2784..2c924d83 100644 --- a/.github/workflows/dev-ci-cd.yaml +++ b/.github/workflows/dev-ci-cd.yaml @@ -30,54 +30,27 @@ jobs: - name: Create Secrets Files run: | mkdir -p ${{ github.workspace }}/deploy/dev/secrets/ - touch ${{ github.workspace }}/deploy/dev/secrets/imap_user.txt - echo "${{ secrets.DEV_IMAP_USER }}" >> ${{ github.workspace }}/deploy/dev/secrets/imap_user.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/imap_user.txt - touch ${{ github.workspace }}/deploy/dev/secrets/imap_pw.txt - echo "${{ secrets.DEV_IMAP_PW }}" >> ${{ github.workspace }}/deploy/dev/secrets/imap_pw.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/imap_pw.txt - touch ${{ github.workspace }}/deploy/dev/secrets/cleo_url.txt - echo "${{ secrets.DEV_CLEO_URL }}" >> ${{ github.workspace }}/deploy/dev/secrets/cleo_url.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/cleo_url.txt - touch ${{ github.workspace }}/deploy/dev/secrets/cleo_user.txt - echo "${{ secrets.DEV_CLEO_USER }}" >> ${{ github.workspace }}/deploy/dev/secrets/cleo_user.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/cleo_user.txt - touch ${{ github.workspace }}/deploy/dev/secrets/cleo_pw.txt - echo "${{ secrets.DEV_CLEO_PW }}" >> ${{ github.workspace }}/deploy/dev/secrets/cleo_pw.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/cleo_pw.txt - touch ${{ github.workspace }}/deploy/dev/secrets/cleo_project.txt - echo "${{ secrets.DEV_CLEO_PROJECT }}" >> ${{ github.workspace }}/deploy/dev/secrets/cleo_project.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/cleo_project.txt - touch ${{ github.workspace }}/deploy/dev/secrets/sender_server.txt - echo "${{ secrets.DEV_SENDER_SERVER }}" >> ${{ github.workspace }}/deploy/dev/secrets/sender_server.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/sender_server.txt - touch ${{ github.workspace }}/deploy/dev/secrets/sender_port.txt - echo "${{ secrets.DEV_SENDER_PORT }}" >> ${{ github.workspace }}/deploy/dev/secrets/sender_port.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/sender_port.txt - touch ${{ github.workspace }}/deploy/dev/secrets/sender_replyto.txt - echo "${{ secrets.DEV_SENDER_REPLYTO }}" >> ${{ github.workspace }}/deploy/dev/secrets/sender_replyto.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/sender_replyto.txt - touch ${{ github.workspace }}/deploy/dev/secrets/sender_user.txt - echo "${{ secrets.DEV_SENDER_USER }}" >> ${{ github.workspace }}/deploy/dev/secrets/sender_user.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/sender_user.txt - touch ${{ github.workspace }}/deploy/dev/secrets/sender_pw.txt - echo "${{ secrets.DEV_SENDER_PW }}" >> ${{ github.workspace }}/deploy/dev/secrets/sender_pw.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/sender_pw.txt - touch ${{ github.workspace }}/deploy/dev/secrets/flask_uploader_app_secret_key.txt - echo "${{ secrets.DEV_FLASK_UPLOADER_APP_SECRET_KEY }}" >> ${{ github.workspace }}/deploy/dev/secrets/flask_uploader_app_secret_key.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/flask_uploader_app_secret_key.txt - touch ${{ github.workspace }}/deploy/dev/secrets/uploader_salt.txt - echo "${{ secrets.DEV_UPLOADER_SALT }}" >> ${{ github.workspace }}/deploy/dev/secrets/uploader_salt.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/uploader_salt.txt - touch ${{ github.workspace }}/deploy/dev/secrets/openai_api_key.txt - echo "${{ secrets.OPENAI_API_KEY }}" >> ${{ github.workspace }}/deploy/dev/secrets/openai_api_key.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/openai_api_key.txt - touch ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt - echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt - touch ${{ github.workspace }}/deploy/dev/secrets/pg_password.txt - echo "${{ secrets.DEV_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/dev/secrets/pg_password.txt - chmod 400 ${{ github.workspace }}/deploy/dev/secrets/pg_password.txt + workspace=${{ github.workspace }} + env="dev" + sed -i "s/WORKSPACE/${workspace//\//\\/}/" ${workspace}/deploy/create_secret.sh + sed -i "s/ENV/${env}/" ${workspace}/deploy/create_secret.sh + /bin/bash ${workspace}/deploy/create_secret.sh imap_user.txt ${{ secrets.DEV_IMAP_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh imap_pw.txt ${{ secrets.DEV_IMAP_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_url.txt ${{ secrets.DEV_CLEO_URL }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_user.txt ${{ secrets.DEV_CLEO_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_pw.txt ${{ secrets.DEV_CLEO_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_project.txt ${{ secrets.DEV_CLEO_PROJECT }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_server.txt ${{ secrets.DEV_SENDER_SERVER }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_port.txt ${{ secrets.DEV_SENDER_PORT }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_replyto.txt ${{ secrets.DEV_SENDER_REPLYTO }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_user.txt ${{ secrets.DEV_SENDER_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_pw.txt ${{ secrets.DEV_SENDER_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh flask_uploader_app_secret_key.txt ${{ secrets.DEV_FLASK_UPLOADER_APP_SECRET_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh uploader_salt.txt ${{ secrets.DEV_UPLOADER_SALT }} + /bin/bash ${workspace}/deploy/create_secret.sh openai_api_key.txt ${{ secrets.OPENAI_API_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh hf_token.txt ${{ secrets.HF_TOKEN }} + /bin/bash ${workspace}/deploy/create_secret.sh pg_password.txt ${{ secrets.DEV_PG_PASSWORD }} + /bin/bash ${workspace}/deploy/create_secret.sh grafana_password.txt ${{ secrets.DEV_GRAFANA_PG_PASSWORD }} # create env file to set tag(s) for docker-compose - name: Create Env File @@ -87,6 +60,11 @@ jobs: export tag="${tag//\//-}.${GITHUB_SHA}" echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/dev/.env + # create deployment directory if it doesn't already exist + - name: Create Directory + run: | + ssh submit06 "mkdir -p ~/A2rchi-dev/" + # stop any existing docker compose that's running - name: Stop Docker Compose run: | diff --git a/.github/workflows/prod-65830-ci-cd.yaml b/.github/workflows/prod-65830-ci-cd.yaml new file mode 100644 index 00000000..86936f5b --- /dev/null +++ b/.github/workflows/prod-65830-ci-cd.yaml @@ -0,0 +1,82 @@ +name: Deploy A2rchi Prod for 6.5830 +run-name: ${{ github.actor }} deploys A2rchi for 6.5830 to prod +on: + push: + branches: + - release-65830 +jobs: + deploy-prod-system: + runs-on: ubuntu-latest + env: + SSH_AUTH_SOCK: /tmp/ssh_agent.sock + steps: + # boilerplate message and pull repository to CI runner + - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event." + - uses: actions/checkout@v3 + - run: echo "The ${{ github.repository }} repository has been cloned to the runner." + + # setup SSH + - name: Setup SSH + run: | + mkdir -p /home/runner/.ssh/ + echo "${{ secrets.SSH_PRIVATE_KEY_MDRUSSO }}" > /home/runner/.ssh/id_rsa_submit + chmod 600 /home/runner/.ssh/id_rsa_submit + echo "${{ secrets.SSH_SUBMIT_KNOWN_HOSTS }}" > ~/.ssh/known_hosts + cp ${{ github.workspace }}/deploy/ssh_config /home/runner/.ssh/config + ssh-agent -a $SSH_AUTH_SOCK > /dev/null + ssh-add /home/runner/.ssh/id_rsa_submit + + # create secrets files for docker-compose + - name: Create Secrets Files + run: | + mkdir -p ${{ github.workspace }}/deploy/prod-65830/secrets/ + workspace=${{ github.workspace }} + env="prod-65830" + sed -i "s/WORKSPACE/${workspace//\//\\/}/" ${workspace}/deploy/create_secret.sh + sed -i "s/ENV/${env}/" ${workspace}/deploy/create_secret.sh + /bin/bash ${workspace}/deploy/create_secret.sh flask_uploader_app_secret_key.txt ${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh uploader_salt.txt ${{ secrets.PROD_UPLOADER_SALT }} + /bin/bash ${workspace}/deploy/create_secret.sh openai_api_key.txt ${{ secrets.OPENAI_API_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh hf_token.txt ${{ secrets.HF_TOKEN }} + /bin/bash ${workspace}/deploy/create_secret.sh piazza_email.txt ${{ secrets.PROD_65830_PIAZZA_EMAIL }} + /bin/bash ${workspace}/deploy/create_secret.sh piazza_password.txt ${{ secrets.PROD_65830_PIAZZA_PASSWORD }} + /bin/bash ${workspace}/deploy/create_secret.sh slack_webhook.txt ${{ secrets.PROD_65830_SLACK_WEBHOOK }} + + # create env file to set tag(s) for docker-compose + - name: Create Env File + run: | + touch ${{ github.workspace }}/deploy/prod-65830/.env + export tag="${GITHUB_REF#refs/heads/}" + export tag="${tag//\//-}.${GITHUB_SHA}" + echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod-65830/.env + + # create deployment directory if it doesn't already exist + - name: Create Directory + run: | + ssh submit06 "mkdir -p ~/A2rchi-prod-65830/" + + # stop any existing docker compose that's running + - name: Stop Docker Compose + run: | + ssh submit06 'bash -s' < ${{ github.workspace }}/deploy/prod-65830/prod-65830-stop.sh + + # copy repository to machine + - name: Copy Repository + run: | + rsync -e ssh -r ${{ github.workspace}}/* --exclude .git/ --delete submit06:~/A2rchi-prod-65830/ + + # run deploy script + - name: Run Deploy Script + run: | + export tag="${GITHUB_REF#refs/heads/}" + export tag="${tag//\//-}.${GITHUB_SHA}" + sed -i "s/BASE_TAG/${tag}/" ${{ github.workspace }}/deploy/prod-65830/prod-65830-install.sh + ssh submit06 'bash -s' < ${{ github.workspace }}/deploy/prod-65830/prod-65830-install.sh + + # clean up secret files + - name: Remove Secrets from Runner + run: | + rm ${{ github.workspace }}/deploy/prod-65830/secrets/*.txt + + # print job status + - run: echo "🍏 This job's status is ${{ job.status }}." \ No newline at end of file diff --git a/.github/workflows/prod-801-ci-cd.yaml b/.github/workflows/prod-801-ci-cd.yaml index 9d9429b6..9728ceb4 100644 --- a/.github/workflows/prod-801-ci-cd.yaml +++ b/.github/workflows/prod-801-ci-cd.yaml @@ -30,21 +30,16 @@ jobs: - name: Create Secrets Files run: | mkdir -p ${{ github.workspace }}/deploy/prod-801/secrets/ - touch ${{ github.workspace }}/deploy/prod-801/secrets/flask_uploader_app_secret_key.txt - echo "${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/flask_uploader_app_secret_key.txt - chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/flask_uploader_app_secret_key.txt - touch ${{ github.workspace }}/deploy/prod-801/secrets/uploader_salt.txt - echo "${{ secrets.PROD_UPLOADER_SALT }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/uploader_salt.txt - chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/uploader_salt.txt - touch ${{ github.workspace }}/deploy/prod-801/secrets/openai_api_key.txt - echo "${{ secrets.OPENAI_API_KEY }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/openai_api_key.txt - chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/openai_api_key.txt - touch ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt - echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt - chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt - touch ${{ github.workspace }}/deploy/prod-801/secrets/pg_password.txt - echo "${{ secrets.PROD_801_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/pg_password.txt - chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/pg_password.txt + workspace=${{ github.workspace }} + env="prod-801" + sed -i "s/WORKSPACE/${workspace//\//\\/}/" ${workspace}/deploy/create_secret.sh + sed -i "s/ENV/${env}/" ${workspace}/deploy/create_secret.sh + /bin/bash ${workspace}/deploy/create_secret.sh flask_uploader_app_secret_key.txt ${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh uploader_salt.txt ${{ secrets.PROD_UPLOADER_SALT }} + /bin/bash ${workspace}/deploy/create_secret.sh openai_api_key.txt ${{ secrets.OPENAI_API_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh hf_token.txt ${{ secrets.HF_TOKEN }} + /bin/bash ${workspace}/deploy/create_secret.sh pg_password.txt ${{ secrets.PROD_801_PG_PASSWORD }} + /bin/bash ${workspace}/deploy/create_secret.sh grafana_password.txt ${{ secrets.PROD_801_GRAFANA_PG_PASSWORD }} # create env file to set tag(s) for docker-compose - name: Create Env File @@ -54,6 +49,11 @@ jobs: export tag="${tag//\//-}.${GITHUB_SHA}" echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod-801/.env + # create deployment directory if it doesn't already exist + - name: Create Directory + run: | + ssh submit-t3desk "mkdir -p ~/A2rchi-prod-801/" + # stop any existing docker compose that's running - name: Stop Docker Compose run: | diff --git a/.github/workflows/prod-ci-cd.yaml b/.github/workflows/prod-ci-cd.yaml index ad035343..c33c66a0 100644 --- a/.github/workflows/prod-ci-cd.yaml +++ b/.github/workflows/prod-ci-cd.yaml @@ -30,54 +30,27 @@ jobs: - name: Create Secrets Files run: | mkdir -p ${{ github.workspace }}/deploy/prod/secrets/ - touch ${{ github.workspace }}/deploy/prod/secrets/imap_user.txt - echo "${{ secrets.PROD_IMAP_USER }}" >> ${{ github.workspace }}/deploy/prod/secrets/imap_user.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/imap_user.txt - touch ${{ github.workspace }}/deploy/prod/secrets/imap_pw.txt - echo "${{ secrets.PROD_IMAP_PW }}" >> ${{ github.workspace }}/deploy/prod/secrets/imap_pw.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/imap_pw.txt - touch ${{ github.workspace }}/deploy/prod/secrets/cleo_url.txt - echo "${{ secrets.PROD_CLEO_URL }}" >> ${{ github.workspace }}/deploy/prod/secrets/cleo_url.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/cleo_url.txt - touch ${{ github.workspace }}/deploy/prod/secrets/cleo_user.txt - echo "${{ secrets.PROD_CLEO_USER }}" >> ${{ github.workspace }}/deploy/prod/secrets/cleo_user.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/cleo_user.txt - touch ${{ github.workspace }}/deploy/prod/secrets/cleo_pw.txt - echo "${{ secrets.PROD_CLEO_PW }}" >> ${{ github.workspace }}/deploy/prod/secrets/cleo_pw.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/cleo_pw.txt - touch ${{ github.workspace }}/deploy/prod/secrets/cleo_project.txt - echo "${{ secrets.PROD_CLEO_PROJECT }}" >> ${{ github.workspace }}/deploy/prod/secrets/cleo_project.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/cleo_project.txt - touch ${{ github.workspace }}/deploy/prod/secrets/sender_server.txt - echo "${{ secrets.PROD_SENDER_SERVER }}" >> ${{ github.workspace }}/deploy/prod/secrets/sender_server.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/sender_server.txt - touch ${{ github.workspace }}/deploy/prod/secrets/sender_port.txt - echo "${{ secrets.PROD_SENDER_PORT }}" >> ${{ github.workspace }}/deploy/prod/secrets/sender_port.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/sender_port.txt - touch ${{ github.workspace }}/deploy/prod/secrets/sender_replyto.txt - echo "${{ secrets.PROD_SENDER_REPLYTO }}" >> ${{ github.workspace }}/deploy/prod/secrets/sender_replyto.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/sender_replyto.txt - touch ${{ github.workspace }}/deploy/prod/secrets/sender_user.txt - echo "${{ secrets.PROD_SENDER_USER }}" >> ${{ github.workspace }}/deploy/prod/secrets/sender_user.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/sender_user.txt - touch ${{ github.workspace }}/deploy/prod/secrets/sender_pw.txt - echo "${{ secrets.PROD_SENDER_PW }}" >> ${{ github.workspace }}/deploy/prod/secrets/sender_pw.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/sender_pw.txt - touch ${{ github.workspace }}/deploy/prod/secrets/flask_uploader_app_secret_key.txt - echo "${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }}" >> ${{ github.workspace }}/deploy/prod/secrets/flask_uploader_app_secret_key.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/flask_uploader_app_secret_key.txt - touch ${{ github.workspace }}/deploy/prod/secrets/uploader_salt.txt - echo "${{ secrets.PROD_UPLOADER_SALT }}" >> ${{ github.workspace }}/deploy/prod/secrets/uploader_salt.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/uploader_salt.txt - touch ${{ github.workspace }}/deploy/prod/secrets/openai_api_key.txt - echo "${{ secrets.OPENAI_API_KEY }}" >> ${{ github.workspace }}/deploy/prod/secrets/openai_api_key.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/openai_api_key.txt - touch ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt - echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt - touch ${{ github.workspace }}/deploy/prod/secrets/pg_password.txt - echo "${{ secrets.PROD_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/prod/secrets/pg_password.txt - chmod 400 ${{ github.workspace }}/deploy/prod/secrets/pg_password.txt + workspace=${{ github.workspace }} + env="prod" + sed -i "s/WORKSPACE/${workspace//\//\\/}/" ${workspace}/deploy/create_secret.sh + sed -i "s/ENV/${env}/" ${workspace}/deploy/create_secret.sh + /bin/bash ${workspace}/deploy/create_secret.sh imap_user.txt ${{ secrets.PROD_IMAP_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh imap_pw.txt ${{ secrets.PROD_IMAP_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_url.txt ${{ secrets.PROD_CLEO_URL }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_user.txt ${{ secrets.PROD_CLEO_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_pw.txt ${{ secrets.PROD_CLEO_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_project.txt ${{ secrets.PROD_CLEO_PROJECT }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_server.txt ${{ secrets.PROD_SENDER_SERVER }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_port.txt ${{ secrets.PROD_SENDER_PORT }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_replyto.txt ${{ secrets.PROD_SENDER_REPLYTO }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_user.txt ${{ secrets.PROD_SENDER_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_pw.txt ${{ secrets.PROD_SENDER_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh flask_uploader_app_secret_key.txt ${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh uploader_salt.txt ${{ secrets.PROD_UPLOADER_SALT }} + /bin/bash ${workspace}/deploy/create_secret.sh openai_api_key.txt ${{ secrets.OPENAI_API_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh hf_token.txt ${{ secrets.HF_TOKEN }} + /bin/bash ${workspace}/deploy/create_secret.sh pg_password.txt ${{ secrets.PROD_PG_PASSWORD }} + /bin/bash ${workspace}/deploy/create_secret.sh grafana_password.txt ${{ secrets.PROD_GRAFANA_PG_PASSWORD }} # create env file to set tag(s) for docker-compose - name: Create Env File @@ -87,6 +60,11 @@ jobs: export tag="${tag//\//-}.${GITHUB_SHA}" echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod/.env + # create deployment directory if it doesn't already exist + - name: Create Directory + run: | + ssh submit-t3desk "mkdir -p ~/A2rchi-prod/" + # stop any existing docker compose that's running - name: Stop Docker Compose run: | diff --git a/.github/workflows/prod-meta-ci-cd.yaml b/.github/workflows/prod-meta-ci-cd.yaml index 9edd0b5c..63607982 100644 --- a/.github/workflows/prod-meta-ci-cd.yaml +++ b/.github/workflows/prod-meta-ci-cd.yaml @@ -31,45 +31,25 @@ jobs: - name: Create Secrets Files run: | mkdir -p ${{ github.workspace }}/deploy/prod-meta/secrets/ - touch ${{ github.workspace }}/deploy/prod-meta/secrets/imap_user.txt - echo "${{ secrets.PROD_META_IMAP_USER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/imap_user.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/imap_user.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/imap_pw.txt - echo "${{ secrets.PROD_IMAP_PW }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/imap_pw.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/imap_pw.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_url.txt - echo "${{ secrets.PROD_CLEO_URL }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_url.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_url.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_user.txt - echo "${{ secrets.PROD_CLEO_USER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_user.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_user.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_pw.txt - echo "${{ secrets.PROD_CLEO_PW }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_pw.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_pw.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_project.txt - echo "${{ secrets.PROD_META_CLEO_PROJECT }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_project.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_project.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_server.txt - echo "${{ secrets.PROD_SENDER_SERVER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_server.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_server.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_port.txt - echo "${{ secrets.PROD_SENDER_PORT }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_port.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_port.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_replyto.txt - echo "${{ secrets.PROD_META_SENDER_REPLYTO }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_replyto.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_replyto.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_user.txt - echo "${{ secrets.PROD_SENDER_USER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_user.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_user.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_pw.txt - echo "${{ secrets.PROD_SENDER_PW }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_pw.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_pw.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/openai_api_key.txt - echo "${{ secrets.OPENAI_API_KEY }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/openai_api_key.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/openai_api_key.txt - touch ${{ github.workspace }}/deploy/prod-meta/secrets/hf_token.txt - echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/hf_token.txt - chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/hf_token.txt + workspace=${{ github.workspace }} + env="prod-meta" + sed -i "s/WORKSPACE/${workspace//\//\\/}/" ${workspace}/deploy/create_secret.sh + sed -i "s/ENV/${env}/" ${workspace}/deploy/create_secret.sh + /bin/bash ${workspace}/deploy/create_secret.sh imap_user.txt ${{ secrets.PROD_META_IMAP_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh imap_pw.txt ${{ secrets.PROD_IMAP_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_url.txt ${{ secrets.PROD_CLEO_URL }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_user.txt ${{ secrets.PROD_CLEO_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_pw.txt ${{ secrets.PROD_CLEO_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh cleo_project.txt ${{ secrets.PROD_META_CLEO_PROJECT }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_server.txt ${{ secrets.PROD_SENDER_SERVER }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_port.txt ${{ secrets.PROD_SENDER_PORT }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_replyto.txt ${{ secrets.PROD_META_SENDER_REPLYTO }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_user.txt ${{ secrets.PROD_SENDER_USER }} + /bin/bash ${workspace}/deploy/create_secret.sh sender_pw.txt ${{ secrets.PROD_SENDER_PW }} + /bin/bash ${workspace}/deploy/create_secret.sh flask_uploader_app_secret_key.txt ${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh uploader_salt.txt ${{ secrets.PROD_UPLOADER_SALT }} + /bin/bash ${workspace}/deploy/create_secret.sh openai_api_key.txt ${{ secrets.OPENAI_API_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh hf_token.txt ${{ secrets.HF_TOKEN }} # create env file to set tag(s) for docker-compose - name: Create Env File @@ -78,7 +58,12 @@ jobs: export tag="${GITHUB_REF#refs/heads/}" export tag="${tag//\//-}.${GITHUB_SHA}" echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod-meta/.env - + + # create deployment directory if it doesn't already exist + - name: Create Directory + run: | + ssh submit06 "mkdir -p ~/A2rchi-prod-meta/" + # stop any existing docker compose that's running - name: Stop Docker Compose run: | diff --git a/.github/workflows/prod-root-ci-cd.yaml b/.github/workflows/prod-root-ci-cd.yaml index ba4d87e6..a9b3065c 100644 --- a/.github/workflows/prod-root-ci-cd.yaml +++ b/.github/workflows/prod-root-ci-cd.yaml @@ -30,21 +30,16 @@ jobs: - name: Create Secrets Files run: | mkdir -p ${{ github.workspace }}/deploy/prod-root/secrets/ - touch ${{ github.workspace }}/deploy/prod-root/secrets/flask_uploader_app_secret_key.txt - echo "${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/flask_uploader_app_secret_key.txt - chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/flask_uploader_app_secret_key.txt - touch ${{ github.workspace }}/deploy/prod-root/secrets/uploader_salt.txt - echo "${{ secrets.PROD_UPLOADER_SALT }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/uploader_salt.txt - chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/uploader_salt.txt - touch ${{ github.workspace }}/deploy/prod-root/secrets/openai_api_key.txt - echo "${{ secrets.OPENAI_API_KEY }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/openai_api_key.txt - chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/openai_api_key.txt - touch ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt - echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt - chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt - touch ${{ github.workspace }}/deploy/prod-root/secrets/pg_password.txt - echo "${{ secrets.PROD_ROOT_PG_PASSWORD }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/pg_password.txt - chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/pg_password.txt + workspace=${{ github.workspace }} + env="prod-root" + sed -i "s/WORKSPACE/${workspace//\//\\/}/" ${workspace}/deploy/create_secret.sh + sed -i "s/ENV/${env}/" ${workspace}/deploy/create_secret.sh + /bin/bash ${workspace}/deploy/create_secret.sh flask_uploader_app_secret_key.txt ${{ secrets.PROD_FLASK_UPLOADER_APP_SECRET_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh uploader_salt.txt ${{ secrets.PROD_UPLOADER_SALT }} + /bin/bash ${workspace}/deploy/create_secret.sh openai_api_key.txt ${{ secrets.OPENAI_API_KEY }} + /bin/bash ${workspace}/deploy/create_secret.sh hf_token.txt ${{ secrets.HF_TOKEN }} + /bin/bash ${workspace}/deploy/create_secret.sh pg_password.txt ${{ secrets.PROD_ROOT_PG_PASSWORD }} + /bin/bash ${workspace}/deploy/create_secret.sh grafana_password.txt ${{ secrets.PROD_ROOT_GRAFANA_PG_PASSWORD }} # create env file to set tag(s) for docker-compose - name: Create Env File @@ -54,6 +49,11 @@ jobs: export tag="${tag//\//-}.${GITHUB_SHA}" echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod-root/.env + # create deployment directory if it doesn't already exist + - name: Create Directory + run: | + ssh submit06 "mkdir -p ~/A2rchi-prod-root/" + # stop any existing docker compose that's running - name: Stop Docker Compose run: | diff --git a/.gitignore b/.gitignore index 2eaa0d0e..3d9e7a67 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ venv *sqlite_db .vscode 801-content/ +*secrets/ logos/ diff --git a/A2rchi/utils/sql.py b/A2rchi/utils/sql.py deleted file mode 100644 index c2b780db..00000000 --- a/A2rchi/utils/sql.py +++ /dev/null @@ -1,6 +0,0 @@ -"""SQL queries used by A2rchi""" -SQL_INSERT_CONVO = "INSERT INTO conversations (conversation_id, sender, content, ts) VALUES %s RETURNING message_id;" - -SQL_INSERT_FEEDBACK = "INSERT INTO feedback (mid, feedback_ts, feedback, feedback_msg, incorrect, unhelpful, inappropriate) VALUES (%s, %s, %s, %s, %s, %s, %s);" - -SQL_QUERY_CONVO = "SELECT sender, content FROM conversations WHERE conversation_id = %s ORDER BY message_id ASC;" diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 00000000..dd51f034 --- /dev/null +++ b/SETUP.md @@ -0,0 +1,62 @@ +## Getting Started with A2rchi +### System Requirements +These instructions were validated on an AWS EC2 c5d.large (2 vCPU, 4GB memory, 20 GiB NVMe SSD) running an Ubuntu 22.04 AMI. + +### Install Docker and Docker-Compose +First, if your system does not already have `docker`, please install `docker` and `docker-compose` (see below if the last command fails): +``` +$ sudo apt-get update +$ sudo apt-get install docker.io +$ sudo apt-get install docker-compose-plugin +``` +If the latter command fails, you can install the compose plugin manually by executing the following: +``` +$ DOCKER_CONFIG=${DOCKER_CONFIG:-$HOME/.docker} +$ mkdir -p $DOCKER_CONFIG/cli-plugins +$ curl -SL https://github.com/docker/compose/releases/download/v2.23.0/docker-compose-linux-x86_64 -o $DOCKER_CONFIG/cli-plugins/docker-compose +$ chmod +x $DOCKER_CONFIG/cli-plugins/docker-compose +``` +Please see the following (documentation)[https://docs.docker.com/compose/install/linux/#install-using-the-repository] for more details if you would like to install a different version of Compose or if you want to install it for all users. + +### Create Docker Group +If your system already has `docker` --- and you can run `docker` without using `sudo` --- then you can skip this step. Otherwise, you will need to create a docker group and add your user to it so that our installation script can run without assuming `sudo`. + +First, create the docker group as follows: +``` +$ sudo groupadd docker +``` +Next, add your user to the docker group: +``` +$ sudo usermod -aG docker $USER +``` +You can then activate this change by either logging out and logging back in to your system, or by executing: +``` +$ newgrp docker +``` +For more details, please see the following (documentation)[https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user] + +### Set Variables and Run Installation +Once `docker` and `docker-compose` are installed, you'll then need to provide your OpenAI API key and HuggingFace token to the services by placing them in files which will be used by `docker-compose`: +``` +$ mkdir deploy/vanilla/secrets +$ echo $OPENAI_API_KEY > deploy/vanilla/secrets/openai_api_key.txt +$ echo $HF_TOKEN > deploy/vanilla/secrets/hf_token.txt +``` +Compose will place these files into a special `/run/secrets` directory inside of the containers so that these values can be accessed at runtime. + +Now you can run your installation by executing the following from the root of the A2rchi repository: +``` +$ ./deploy/vanilla/install.sh +``` + +After `docker` builds and starts its images, you should eventually see something like the following: +``` +[+] Running 3/3 + ✔ Network vanilla_default Created + ✔ Container vanilla-chromadb-1 Healthy + ✔ Container vanilla-chat-1 Started +``` +Note that the `vanilla-chat-1` container will wait for the `vanilla-chromadb-1` container to become healthy before starting, in a process that should take ~10s after the latter's creation. + +- you can now access the chat app by visiting (host:port) +- you can upload documents by doing xyz \ No newline at end of file diff --git a/A2rchi/__init__.py b/a2rchi/__init__.py similarity index 100% rename from A2rchi/__init__.py rename to a2rchi/__init__.py diff --git a/A2rchi/bin/__init__.py b/a2rchi/bin/__init__.py similarity index 100% rename from A2rchi/bin/__init__.py rename to a2rchi/bin/__init__.py diff --git a/A2rchi/bin/service_chat.py b/a2rchi/bin/service_chat.py similarity index 90% rename from A2rchi/bin/service_chat.py rename to a2rchi/bin/service_chat.py index 039a4667..08b2e0b0 100644 --- a/A2rchi/bin/service_chat.py +++ b/a2rchi/bin/service_chat.py @@ -1,7 +1,7 @@ #!/bin/python -from A2rchi.interfaces.chat_app.app import FlaskAppWrapper -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.env import read_secret +from a2rchi.interfaces.chat_app.app import FlaskAppWrapper +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.env import read_secret from flask import Flask diff --git a/A2rchi/bin/service_cleo.py b/a2rchi/bin/service_cleo.py similarity index 59% rename from A2rchi/bin/service_cleo.py rename to a2rchi/bin/service_cleo.py index c2525e58..759e9965 100755 --- a/A2rchi/bin/service_cleo.py +++ b/a2rchi/bin/service_cleo.py @@ -1,7 +1,7 @@ #!/bin/python -from A2rchi.interfaces import cleo -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.env import read_secret +from a2rchi.interfaces import cleo +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.env import read_secret import os import time @@ -9,8 +9,13 @@ # set openai os.environ['OPENAI_API_KEY'] = read_secret("OPENAI_API_KEY") os.environ['HUGGING_FACE_HUB_TOKEN'] = read_secret("HUGGING_FACE_HUB_TOKEN") -print("Starting Cleo Service") +# temporary hack to prevent cleo, mailbox, and chat services from all +# starting DataManager at the same time; eventually replace this with +# more robust solution +time.sleep(30) + +print("Starting Cleo Service") config = Config_Loader().config["utils"] cleo = cleo.Cleo('Cleo_Helpdesk') diff --git a/A2rchi/bin/service_create_account.py b/a2rchi/bin/service_create_account.py similarity index 83% rename from A2rchi/bin/service_create_account.py rename to a2rchi/bin/service_create_account.py index 390502e6..5302e20c 100755 --- a/A2rchi/bin/service_create_account.py +++ b/a2rchi/bin/service_create_account.py @@ -1,7 +1,7 @@ #!/bin/python -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.env import read_secret -from A2rchi.interfaces.uploader_app.app import add_username_password +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.env import read_secret +from a2rchi.interfaces.uploader_app.app import add_username_password import getpass import os diff --git a/A2rchi/bin/service_mailbox.py b/a2rchi/bin/service_mailbox.py similarity index 61% rename from A2rchi/bin/service_mailbox.py rename to a2rchi/bin/service_mailbox.py index cbcd90ab..ef7b89ee 100755 --- a/A2rchi/bin/service_mailbox.py +++ b/a2rchi/bin/service_mailbox.py @@ -1,8 +1,8 @@ #!/bin/python -from A2rchi.interfaces import cleo -from A2rchi.utils import mailbox -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.env import read_secret +from a2rchi.interfaces import cleo +from a2rchi.utils import mailbox +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.env import read_secret import os import time @@ -13,8 +13,12 @@ user = read_secret('IMAP_USER') password = read_secret('IMAP_PW') -print("Starting Mailbox Service") +# temporary hack to prevent cleo, mailbox, and chat services from all +# starting DataManager at the same time; eventually replace this with +# more robust solution +time.sleep(60) +print("Starting Mailbox Service") config = Config_Loader().config["utils"] cleo = cleo.Cleo('Cleo_Helpdesk') diff --git a/a2rchi/bin/service_piazza.py b/a2rchi/bin/service_piazza.py new file mode 100644 index 00000000..667a75c0 --- /dev/null +++ b/a2rchi/bin/service_piazza.py @@ -0,0 +1,152 @@ +#!/bin/python +from a2rchi.chains.chain import Chain +from a2rchi.interfaces.uploader_app.app import FlaskAppWrapper +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.data_manager import DataManager +from a2rchi.utils.env import read_secret +from a2rchi.utils.scraper import Scraper + +from flask import Flask +from piazza_api import Piazza +from threading import Thread + +import json +import os +import requests +import time + +# DEFINITIONS +SLACK_HEADERS = {'content-type': 'application/json'} +MIN_NEXT_POST_FILE = "/root/data/min_next_post.json" + +# set openai +os.environ['OPENAI_API_KEY'] = read_secret("OPENAI_API_KEY") +os.environ['HUGGING_FACE_HUB_TOKEN'] = read_secret("HUGGING_FACE_HUB_TOKEN") +slack_url = read_secret("SLACK_WEBHOOK") +piazza_email = read_secret("PIAZZA_EMAIL") +piazza_password = read_secret("PIAZZA_PASSWORD") +piazza_config = Config_Loader().config["utils"].get("piazza", None) + +# scrape data onto the filesystem +scraper = Scraper() +scraper.hard_scrape(verbose=True) +# unresolved_posts = scraper.piazza_scrape(verbose=True) + +# update vector store +data_manager = DataManager() +data_manager.update_vectorstore() + +# go through unresolved posts and suggest answers + +# from this point on; filter feed for new posts and propose answers + +# ^also filter for new posts that have been resolved and add to vector store + +# for now, just iter through all posts and send replies for unresolved + + +# login to piazza +piazza = Piazza() +piazza.user_login(email=piazza_email, password=piazza_password) +piazza_net = piazza.network(piazza_config["network_id"]) + +# create chain +a2rchi_chain = Chain() + +def call_chain(chain, post): + # convert post --> history + post_str = "SUBJECT: " + post['history'][-1]['subject'] + "\n\nCONTENT: " + post['history'][-1]['content'] + history = [("User", post_str)] + + return chain(history)['answer'], post_str + + +def write_min_next_post(post_nr): + with open(MIN_NEXT_POST_FILE, 'w') as f: + json.dump({"min_next_post_nr": post_nr}, f) + + +def read_min_next_post(): + with open(MIN_NEXT_POST_FILE, 'r') as f: + min_next_post_data = json.load(f) + + return int(min_next_post_data['min_next_post_nr']) + +# # get generator for all posts +# max_post_nr = 0 +# posts = piazza_net.iter_all_posts(sleep=1.5) +# for idx, post in enumerate(posts): +# # update highest post # seen +# max_post_nr = max(post['nr'], max_post_nr) + +# # if post has no answer or an unresolved followup, send to A2rchi +# if post.get("no_answer", False): # or post.get("no_answer_followup", False) +# print(f"{idx} PROCESSING POST: {post['nr']}") + +# # generate response +# response, post_str = call_chain(a2rchi_chain, post) +# response = f"====================\nReplying to Post @{post['nr']}\n==========\n\n{post_str}\n==========\n\nA2RCHI RESPONSE: {response}\n====================\n" + +# # send response to Slack +# r = requests.post(slack_url, data=json.dumps({"text": response}), headers=SLACK_HEADERS) +# print(r) + +# else: +# print(f"{idx} skipping post: {post['nr']}") + +# continuously poll for next post +# min_next_post_nr = max_post_nr + 1 + +# write min next post number if we're initializing for the first time +if not os.path.isfile(MIN_NEXT_POST_FILE): + print("WRITING INITIAL MIN. NEXT POST") + write_min_next_post(584) + +# read min next post number +min_next_post_nr = read_min_next_post() + +while True: + try: + # get new post(s) and sort them by 'nr' + feed = piazza_net.get_feed(limit=999999, offset=0) + post_nrs = sorted(list(map(lambda post: post['nr'], feed['feed']))) + largest_post_nr = post_nrs[-1] + except Exception as e: + print("ERROR - Failed to parse feed due to the following exception:") + print(str(e)) + time.sleep(60) + continue + + # keep processing posts >= min_next_post_nr + while len(post_nrs) > 0: + # get next post number + post_nr = post_nrs.pop(-1) + + # stop if we've already processed it + if post_nr < min_next_post_nr: + break + + try: + # otherwise, process it + post = piazza_net.get_post(post_nr) + + # if successful, send to A2rchi + print(f"PROCESSING NEW POST: {post_nr}") + response, post_str = call_chain(a2rchi_chain, post) + response = f"====================\nReplying to Post @{post['nr']}\n==========\n\n{post_str}\n==========\n\nA2RCHI RESPONSE: {response}\n====================\n" + + # send response to Slack + r = requests.post(slack_url, data=json.dumps({"text": response}), headers=SLACK_HEADERS) + print(r) + except Exception as e: + print(f"ERROR - Failed to process post {post_nr} due to the following exception:") + print(str(e)) + + # set min. next post to be one greater than max we just saw + min_next_post_nr = largest_post_nr + 1 + + # write min_next_post_nr so we don't start over on restart + write_min_next_post(min_next_post_nr) + + # sleep for 60s + time.sleep(60) diff --git a/A2rchi/bin/service_uploader.py b/a2rchi/bin/service_uploader.py similarity index 88% rename from A2rchi/bin/service_uploader.py rename to a2rchi/bin/service_uploader.py index 7342bd5b..4219953d 100755 --- a/A2rchi/bin/service_uploader.py +++ b/a2rchi/bin/service_uploader.py @@ -1,9 +1,9 @@ #!/bin/python -from A2rchi.interfaces.uploader_app.app import FlaskAppWrapper -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.data_manager import DataManager -from A2rchi.utils.env import read_secret -from A2rchi.utils.scraper import Scraper +from a2rchi.interfaces.uploader_app.app import FlaskAppWrapper +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.data_manager import DataManager +from a2rchi.utils.env import read_secret +from a2rchi.utils.scraper import Scraper from flask import Flask from threading import Thread diff --git a/A2rchi/chains/__init__.py b/a2rchi/chains/__init__.py similarity index 100% rename from A2rchi/chains/__init__.py rename to a2rchi/chains/__init__.py diff --git a/A2rchi/chains/base.py b/a2rchi/chains/base.py similarity index 97% rename from A2rchi/chains/base.py rename to a2rchi/chains/base.py index c9f9d43c..f5b67d99 100644 --- a/A2rchi/chains/base.py +++ b/a2rchi/chains/base.py @@ -3,8 +3,8 @@ from loguru import logger from langchain.callbacks import FileCallbackHandler -from A2rchi.chains.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT -from A2rchi.utils.config_loader import Config_Loader +from a2rchi.chains.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT +from a2rchi.utils.config_loader import Config_Loader from langchain.base_language import BaseLanguageModel from langchain.chains.combine_documents.stuff import StuffDocumentsChain diff --git a/A2rchi/chains/chain.py b/a2rchi/chains/chain.py similarity index 97% rename from A2rchi/chains/chain.py rename to a2rchi/chains/chain.py index c2ea02a6..8ab6ee90 100644 --- a/A2rchi/chains/chain.py +++ b/a2rchi/chains/chain.py @@ -1,4 +1,4 @@ -from A2rchi.chains.base import BaseSubMITChain as BaseChain +from a2rchi.chains.base import BaseSubMITChain as BaseChain from chromadb.config import Settings from langchain.vectorstores import Chroma @@ -17,7 +17,7 @@ def __init__(self): """ self.kill = False - from A2rchi.utils.config_loader import Config_Loader + from a2rchi.utils.config_loader import Config_Loader self.config = Config_Loader().config["chains"]["chain"] self.global_config = Config_Loader().config["global"] self.utils_config = Config_Loader().config["utils"] diff --git a/A2rchi/chains/models.py b/a2rchi/chains/models.py similarity index 100% rename from A2rchi/chains/models.py rename to a2rchi/chains/models.py diff --git a/A2rchi/chains/prompts.py b/a2rchi/chains/prompts.py similarity index 96% rename from A2rchi/chains/prompts.py rename to a2rchi/chains/prompts.py index cfc5ef93..e4c1d76e 100644 --- a/A2rchi/chains/prompts.py +++ b/a2rchi/chains/prompts.py @@ -1,6 +1,6 @@ # flake8: noqa from langchain.prompts.prompt import PromptTemplate -from A2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.config_loader import Config_Loader config = Config_Loader().config["chains"]["prompts"] diff --git a/A2rchi/interfaces/__init__.py b/a2rchi/interfaces/__init__.py similarity index 100% rename from A2rchi/interfaces/__init__.py rename to a2rchi/interfaces/__init__.py diff --git a/A2rchi/interfaces/chat_app/__init__.py b/a2rchi/interfaces/chat_app/__init__.py similarity index 100% rename from A2rchi/interfaces/chat_app/__init__.py rename to a2rchi/interfaces/chat_app/__init__.py diff --git a/A2rchi/interfaces/chat_app/app.py b/a2rchi/interfaces/chat_app/app.py similarity index 76% rename from A2rchi/interfaces/chat_app/app.py rename to a2rchi/interfaces/chat_app/app.py index 04be7bc4..8a021e43 100644 --- a/A2rchi/interfaces/chat_app/app.py +++ b/a2rchi/interfaces/chat_app/app.py @@ -1,8 +1,8 @@ -from A2rchi.chains.chain import Chain -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.data_manager import DataManager -from A2rchi.utils.env import read_secret -from A2rchi.utils.sql import SQL_INSERT_CONVO, SQL_INSERT_FEEDBACK, SQL_QUERY_CONVO +from a2rchi.chains.chain import Chain +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.data_manager import DataManager +from a2rchi.utils.env import read_secret +from a2rchi.utils.sql import SQL_INSERT_CONVO, SQL_INSERT_FEEDBACK, SQL_INSERT_TIMING, SQL_QUERY_CONVO from datetime import datetime from pygments import highlight @@ -36,7 +36,7 @@ import yaml # DEFINITIONS -QUERY_LIMIT = 10000 # max number of queries per conversation +QUERY_LIMIT = 10000 # max queries per conversation class AnswerRenderer(mt.HTMLRenderer): @@ -148,6 +148,7 @@ def format_code_in_text(text): def insert_feedback(self, feedback): """ + Insert feedback from user for specific message into feedback table. """ # construct insert_tup (mid, feedback_ts, feedback, feedback_msg, incorrect, unhelpful, inappropriate) insert_tup = ( @@ -199,7 +200,7 @@ def insert_conversation(self, conversation_id, user_message, a2rchi_message, is_ """ print(" INFO - entered insert_conversation.") - # parse user message / a2rchi message if not None + # parse user message / a2rchi message user_sender, user_content, user_msg_ts = user_message a2rchi_sender, a2rchi_content, a2rchi_msg_ts = a2rchi_message @@ -229,20 +230,62 @@ def insert_conversation(self, conversation_id, user_message, a2rchi_message, is_ self.cursor, self.conn = None, None return message_ids + + def insert_timing(self, message_id, timestamps): + """ + Store timing info to understand response profile. + """ + print(" INFO - entered insert_timing.") + + # construct insert_tup + insert_tup = ( + message_id, + timestamps['client_sent_msg_ts'], + timestamps['server_received_msg_ts'], + timestamps['lock_acquisition_ts'], + timestamps['vectorstore_update_ts'], + timestamps['query_convo_history_ts'], + timestamps['chain_finished_ts'], + timestamps['similarity_search_ts'], + timestamps['a2rchi_message_ts'], + timestamps['insert_convo_ts'], + timestamps['finish_call_ts'], + timestamps['server_response_msg_ts'], + timestamps['server_response_msg_ts'] - timestamps['server_received_msg_ts'] + ) + + # create connection to database + self.conn = psycopg2.connect(**self.pg_config) + self.cursor = self.conn.cursor() + self.cursor.execute(SQL_INSERT_TIMING, insert_tup) + self.conn.commit() + + # clean up database connection state + self.cursor.close() + self.conn.close() + self.cursor, self.conn = None, None - def __call__(self, message: List[str], conversation_id: int, is_refresh: bool, msg_ts: datetime): + def __call__(self, message: List[str], conversation_id: int, is_refresh: bool, server_received_msg_ts: datetime, client_sent_msg_ts: float, client_timeout: float): """ Execute the chat functionality. """ + # store timestamps for code profiling information + timestamps = {} + self.lock.acquire() + timestamps['lock_acquisition_ts'] = datetime.now() try: # update vector store through data manager; will only do something if new files have been added print("INFO - acquired lock file update vectorstore") self.data_manager.update_vectorstore() + timestamps['vectorstore_update_ts'] = datetime.now() except Exception as e: + # NOTE: we log the error message but do not return here, as a failure + # to update the data manager does not necessarily mean A2rchi cannot + # process and respond to the message print(f"ERROR - {str(e)}") finally: @@ -259,20 +302,27 @@ def __call__(self, message: List[str], conversation_id: int, is_refresh: bool, m # fetch history given conversation_id history = self.query_conversation_history(conversation_id) + timestamps['query_convo_history_ts'] = datetime.now() # if this is a chat refresh / message regeneration; remove previous contiguous non-A2rchi message(s) if is_refresh: while history[-1][0] == "A2rchi": _ = history.pop(-1) + # guard call to LLM; if timestamp from message is more than timeout secs in the past; + # return error=True and do not generate response as the client will have timed out + if server_received_msg_ts.timestamp() - client_sent_msg_ts > client_timeout: + return None, None, None, timestamps, 408 + # run chain to get result; limit users to 1000 queries per conversation; refreshing browser starts new conversation if len(history) < QUERY_LIMIT: full_history = history + [(sender, content)] if not is_refresh else history result = self.chain(full_history) + timestamps['chain_finished_ts'] = datetime.now() else: - # the case where we have exceeded the QUERY LIMIT (built so that we do not overuse the chain) - output = "Sorry, our service is currently down due to exceptional demand. Please come again later." - return output, conversation_id + # for now let's return a timeout error, as returning a different + # error message would require handling new message_ids param. properly + return None, None, None, timestamps, 500 # keep track of total number of queries and log this amount self.number_of_queries += 1 @@ -282,6 +332,7 @@ def __call__(self, message: List[str], conversation_id: int, is_refresh: bool, m # - low score means very close (it's a distance between embedding vectors approximated # by an approximate k-nearest neighbors algorithm called HNSW) score = self.chain.similarity_search(content) + timestamps['similarity_search_ts'] = datetime.now() # load the present list of sources try: @@ -306,21 +357,27 @@ def __call__(self, message: List[str], conversation_id: int, is_refresh: bool, m output = "

" + self.format_code_in_text(result["answer"]) + "

" # write user message and A2rchi response to database - user_message = (sender, content, msg_ts) - a2rchi_message = ("A2rchi", output, datetime.now()) + timestamps['a2rchi_message_ts'] = datetime.now() + user_message = (sender, content, server_received_msg_ts) + a2rchi_message = ("A2rchi", output, timestamps['a2rchi_message_ts']) message_ids = self.insert_conversation(conversation_id, user_message, a2rchi_message, is_refresh) + timestamps['insert_convo_ts'] = datetime.now() except Exception as e: + # NOTE: we log the error message and return here print(f"ERROR - {str(e)}") + return None, None, None, timestamps, 500 finally: if self.cursor is not None: self.cursor.close() if self.conn is not None: self.conn.close() + + timestamps['finish_call_ts'] = datetime.now() - return output, conversation_id, message_ids + return output, conversation_id, message_ids, timestamps, None class FlaskAppWrapper(object): @@ -370,18 +427,44 @@ def get_chat_response(self): discussion ID (either None or an integer) """ # compute timestamp at which message was received by server - msg_ts = datetime.now() + server_received_msg_ts = datetime.now() # get user input and conversation_id from the request message = request.json.get('last_message') conversation_id = request.json.get('conversation_id') is_refresh = request.json.get('is_refresh') + client_sent_msg_ts = request.json.get('client_sent_msg_ts') / 1000 + client_timeout = request.json.get('client_timeout') / 1000 # query the chat and return the results. print(" INFO - Calling the ChatWrapper()") - response, conversation_id, message_ids = self.chat(message, conversation_id, is_refresh, msg_ts) - - return jsonify({'response': response, 'conversation_id': conversation_id, 'a2rchi_msg_id': message_ids[-1]}) + response, conversation_id, message_ids, timestamps, error_code = self.chat(message, conversation_id, is_refresh, server_received_msg_ts, client_sent_msg_ts, client_timeout) + + # handle errors + if error_code is not None: + output = ( + jsonify({'error': 'client timeout'}) + if error_code == 408 + else jsonify({'error': 'server error; see chat logs for message'}) + ) + return output, error_code + + # compute timestamp at which message was returned to client + timestamps['server_response_msg_ts'] = datetime.now() + + # store timing info for this message + timestamps['server_received_msg_ts'] = server_received_msg_ts + timestamps['client_sent_msg_ts'] = datetime.fromtimestamp(client_sent_msg_ts) + self.chat.insert_timing(message_ids[-1], timestamps) + + # otherwise return A2rchi's response to client + return jsonify({ + 'response': response, + 'conversation_id': conversation_id, + 'a2rchi_msg_id': message_ids[-1], + 'server_response_msg_ts': timestamps['server_response_msg_ts'].timestamp(), + 'final_response_msg_ts': datetime.now().timestamp(), + }) def index(self): return render_template('index.html') diff --git a/A2rchi/interfaces/chat_app/static/images/a2rchi-initial.png b/a2rchi/interfaces/chat_app/static/images/a2rchi-initial.png similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/a2rchi-initial.png rename to a2rchi/interfaces/chat_app/static/images/a2rchi-initial.png diff --git a/A2rchi/interfaces/chat_app/static/images/a2rchi.png b/a2rchi/interfaces/chat_app/static/images/a2rchi.png similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/a2rchi.png rename to a2rchi/interfaces/chat_app/static/images/a2rchi.png diff --git a/A2rchi/interfaces/chat_app/static/images/close.png b/a2rchi/interfaces/chat_app/static/images/close.png similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/close.png rename to a2rchi/interfaces/chat_app/static/images/close.png diff --git a/A2rchi/interfaces/chat_app/static/images/refresh.svg b/a2rchi/interfaces/chat_app/static/images/refresh.svg similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/refresh.svg rename to a2rchi/interfaces/chat_app/static/images/refresh.svg diff --git a/A2rchi/interfaces/chat_app/static/images/thumbs_down.png b/a2rchi/interfaces/chat_app/static/images/thumbs_down.png similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/thumbs_down.png rename to a2rchi/interfaces/chat_app/static/images/thumbs_down.png diff --git a/A2rchi/interfaces/chat_app/static/images/thumbs_down.svg b/a2rchi/interfaces/chat_app/static/images/thumbs_down.svg similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/thumbs_down.svg rename to a2rchi/interfaces/chat_app/static/images/thumbs_down.svg diff --git a/A2rchi/interfaces/chat_app/static/images/thumbs_down_filled.png b/a2rchi/interfaces/chat_app/static/images/thumbs_down_filled.png similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/thumbs_down_filled.png rename to a2rchi/interfaces/chat_app/static/images/thumbs_down_filled.png diff --git a/A2rchi/interfaces/chat_app/static/images/thumbs_up.png b/a2rchi/interfaces/chat_app/static/images/thumbs_up.png similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/thumbs_up.png rename to a2rchi/interfaces/chat_app/static/images/thumbs_up.png diff --git a/A2rchi/interfaces/chat_app/static/images/thumbs_up.svg b/a2rchi/interfaces/chat_app/static/images/thumbs_up.svg similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/thumbs_up.svg rename to a2rchi/interfaces/chat_app/static/images/thumbs_up.svg diff --git a/A2rchi/interfaces/chat_app/static/images/thumbs_up_filled.png b/a2rchi/interfaces/chat_app/static/images/thumbs_up_filled.png similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/thumbs_up_filled.png rename to a2rchi/interfaces/chat_app/static/images/thumbs_up_filled.png diff --git a/A2rchi/interfaces/chat_app/static/images/user.svg b/a2rchi/interfaces/chat_app/static/images/user.svg similarity index 100% rename from A2rchi/interfaces/chat_app/static/images/user.svg rename to a2rchi/interfaces/chat_app/static/images/user.svg diff --git a/A2rchi/interfaces/chat_app/static/script.js-template b/a2rchi/interfaces/chat_app/static/script.js-template similarity index 87% rename from A2rchi/interfaces/chat_app/static/script.js-template rename to a2rchi/interfaces/chat_app/static/script.js-template index 86743b69..27eb10a8 100644 --- a/A2rchi/interfaces/chat_app/static/script.js-template +++ b/a2rchi/interfaces/chat_app/static/script.js-template @@ -13,12 +13,32 @@ const helpful_checkbox = document.getElementById("helpful_checkbox"); const appropriate_checkbox = document.getElementById("appropriate_checkbox"); popupForm.style.display = "none"; +// DEFINITIONS +let DEFAULT_TIMEOUT_SECS = 120 let userText = null; let conversation_id = null; let conversation = [] let num_responses_since_last_rating = 0; let last_response_is_feedback_request = false; + +async function fetchWithTimeout(resource, options = {}) { + // extracts `timeout` field from options dict; + // will default to DEFAULT_TIMEOUT_SECS if no field is present + const { timeout = DEFAULT_TIMEOUT_SECS * 1000 } = options; + + const controller = new AbortController(); + const id = setTimeout(() => controller.abort(), timeout); + + const response = await fetch(resource, { + ...options, + signal: controller.signal + }); + clearTimeout(id); + + return response; +} + const loadDataFromLocalstorage = () => { // Load saved chats and theme from local storage and apply/add on the page const themeColor = localStorage.getItem("themeColor"); @@ -80,12 +100,15 @@ const getChatResponse = async (incomingChatDiv, isRefresh=false) => { last_message: conversation.slice(-1), conversation_id: conversation_id, is_refresh: isRefresh, - }) + client_sent_msg_ts: Date.now(), + client_timeout: DEFAULT_TIMEOUT_SECS * 1000 + }), + timeout: DEFAULT_TIMEOUT_SECS * 1000 } // Send POST request to Flask API, get response and set the response as paragraph element text try { - const response = await (await fetch(API_URL, requestOptions)).json(); + const response = await (await fetchWithTimeout(API_URL, requestOptions)).json(); pElement.innerHTML = response.response; pElement.setAttribute('id', response.a2rchi_msg_id.toString()); pElement.classList.add(".default-text"); @@ -133,16 +156,20 @@ const likeResponse = (likeBtn) => { const API_URL = "http://XX-HOSTNAME-XX:XX-HTTP_PORT-XX/api/like"; // Send an API request with the chat content and discussion ID - fetch(API_URL, { - method: "POST", // You may need to adjust the HTTP method - headers: { - "Content-Type": "application/json", - "Access-Control-Allow-Origin": "*", - }, - body: JSON.stringify({ - message_id: likeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, - }), - }) + try { + fetch(API_URL, { + method: "POST", // You may need to adjust the HTTP method + headers: { + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + body: JSON.stringify({ + message_id: likeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, + }), + }) + } catch (error) { + console.log("liked error message") + } } const dislikeResponse = (dislikeBtn) => { @@ -165,22 +192,26 @@ const dislikeResponse = (dislikeBtn) => { function handleSubmitToAPI() { const additionalThoughts = additionalThoughtsInput.value; - fetch(API_URL, { - method: "POST", // You may need to adjust the HTTP method - headers: { - "Content-Type": "application/json", - "Access-Control-Allow-Origin": "*", - }, - body: JSON.stringify({ - message_id: dislikeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, - feedback_msg: additionalThoughts, - incorrect: correct_checkbox.checked, - unhelpful: helpful_checkbox.checked, - inappropriate: appropriate_checkbox.checked, - }), - }); - - //hide pop up formi + try { + fetch(API_URL, { + method: "POST", // You may need to adjust the HTTP method + headers: { + "Content-Type": "application/json", + "Access-Control-Allow-Origin": "*", + }, + body: JSON.stringify({ + message_id: dislikeBtn.parentElement.previousElementSibling.querySelector("p").parentElement.id, + feedback_msg: additionalThoughts, + incorrect: correct_checkbox.checked, + unhelpful: helpful_checkbox.checked, + inappropriate: appropriate_checkbox.checked, + }), + }); + } catch (error) { + console.log("disliked error message") + } + + //hide pop up form popupForm.style.display = "none"; } diff --git a/A2rchi/interfaces/chat_app/static/style.css b/a2rchi/interfaces/chat_app/static/style.css similarity index 99% rename from A2rchi/interfaces/chat_app/static/style.css rename to a2rchi/interfaces/chat_app/static/style.css index 931f370e..23fe7e5d 100644 --- a/A2rchi/interfaces/chat_app/static/style.css +++ b/a2rchi/interfaces/chat_app/static/style.css @@ -168,6 +168,9 @@ span.material-symbols-rounded { .chat .chat-details p.error { color: #e55865; } +.chat .chat-details .error { + padding: 0 50px 0 25px; +} .chat .typing-animation { padding-left: 25px; display: inline-flex; diff --git a/A2rchi/interfaces/chat_app/templates/index.html b/a2rchi/interfaces/chat_app/templates/index.html similarity index 100% rename from A2rchi/interfaces/chat_app/templates/index.html rename to a2rchi/interfaces/chat_app/templates/index.html diff --git a/A2rchi/interfaces/chat_app/templates/terms.html b/a2rchi/interfaces/chat_app/templates/terms.html similarity index 100% rename from A2rchi/interfaces/chat_app/templates/terms.html rename to a2rchi/interfaces/chat_app/templates/terms.html diff --git a/A2rchi/interfaces/cleo.py b/a2rchi/interfaces/cleo.py similarity index 98% rename from A2rchi/interfaces/cleo.py rename to a2rchi/interfaces/cleo.py index 41a18960..577f662b 100644 --- a/A2rchi/interfaces/cleo.py +++ b/a2rchi/interfaces/cleo.py @@ -1,7 +1,7 @@ -from A2rchi.chains.chain import Chain -from A2rchi.utils import sender -from A2rchi.utils.data_manager import DataManager -from A2rchi.utils.env import read_secret +from a2rchi.chains.chain import Chain +from a2rchi.utils import sender +from a2rchi.utils.data_manager import DataManager +from a2rchi.utils.env import read_secret from redminelib import Redmine diff --git a/A2rchi/interfaces/uploader_app/__init__.py b/a2rchi/interfaces/uploader_app/__init__.py similarity index 100% rename from A2rchi/interfaces/uploader_app/__init__.py rename to a2rchi/interfaces/uploader_app/__init__.py diff --git a/A2rchi/interfaces/uploader_app/app.py b/a2rchi/interfaces/uploader_app/app.py similarity index 99% rename from A2rchi/interfaces/uploader_app/app.py rename to a2rchi/interfaces/uploader_app/app.py index d2a77b28..bd380c89 100644 --- a/A2rchi/interfaces/uploader_app/app.py +++ b/a2rchi/interfaces/uploader_app/app.py @@ -1,6 +1,6 @@ -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.env import read_secret -from A2rchi.utils.scraper import Scraper +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.env import read_secret +from a2rchi.utils.scraper import Scraper from flask import render_template, request, redirect, url_for, flash, session diff --git a/A2rchi/interfaces/uploader_app/templates/index.html b/a2rchi/interfaces/uploader_app/templates/index.html similarity index 100% rename from A2rchi/interfaces/uploader_app/templates/index.html rename to a2rchi/interfaces/uploader_app/templates/index.html diff --git a/A2rchi/interfaces/uploader_app/templates/login.html b/a2rchi/interfaces/uploader_app/templates/login.html similarity index 100% rename from A2rchi/interfaces/uploader_app/templates/login.html rename to a2rchi/interfaces/uploader_app/templates/login.html diff --git a/A2rchi/utils/__init__.py b/a2rchi/utils/__init__.py similarity index 100% rename from A2rchi/utils/__init__.py rename to a2rchi/utils/__init__.py diff --git a/A2rchi/utils/config_loader.py b/a2rchi/utils/config_loader.py similarity index 95% rename from A2rchi/utils/config_loader.py rename to a2rchi/utils/config_loader.py index eb4ec527..7a36f0ac 100644 --- a/A2rchi/utils/config_loader.py +++ b/a2rchi/utils/config_loader.py @@ -1,4 +1,4 @@ -from A2rchi.chains.models import OpenAILLM, DumbLLM, LlamaLLM +from a2rchi.chains.models import OpenAILLM, DumbLLM, LlamaLLM from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings import HuggingFaceEmbeddings diff --git a/A2rchi/utils/data_manager.py b/a2rchi/utils/data_manager.py similarity index 99% rename from A2rchi/utils/data_manager.py rename to a2rchi/utils/data_manager.py index de2b3978..7f4c0667 100644 --- a/A2rchi/utils/data_manager.py +++ b/a2rchi/utils/data_manager.py @@ -1,4 +1,4 @@ -from A2rchi.utils.scraper import Scraper +from a2rchi.utils.scraper import Scraper from chromadb.config import Settings from langchain.document_loaders import TextLoader @@ -19,7 +19,7 @@ class DataManager(): def __init__(self): - from A2rchi.utils.config_loader import Config_Loader + from a2rchi.utils.config_loader import Config_Loader self.config = Config_Loader().config["utils"] self.global_config = Config_Loader().config["global"] self.data_path = self.global_config["DATA_PATH"] diff --git a/A2rchi/utils/embedding_utils.py b/a2rchi/utils/embedding_utils.py similarity index 100% rename from A2rchi/utils/embedding_utils.py rename to a2rchi/utils/embedding_utils.py diff --git a/A2rchi/utils/env.py b/a2rchi/utils/env.py similarity index 100% rename from A2rchi/utils/env.py rename to a2rchi/utils/env.py diff --git a/A2rchi/utils/imap.py b/a2rchi/utils/imap.py similarity index 98% rename from A2rchi/utils/imap.py rename to a2rchi/utils/imap.py index c4c1c930..0fa39b4d 100644 --- a/A2rchi/utils/imap.py +++ b/a2rchi/utils/imap.py @@ -1,7 +1,7 @@ import os import getpass, imaplib, email -from A2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.config_loader import Config_Loader config = Config_Loader().config["utils"]["mailbox"] def get_charsets(msg): diff --git a/A2rchi/utils/mailbox.py b/a2rchi/utils/mailbox.py similarity index 98% rename from A2rchi/utils/mailbox.py rename to a2rchi/utils/mailbox.py index 6462a485..dc1949f4 100644 --- a/A2rchi/utils/mailbox.py +++ b/a2rchi/utils/mailbox.py @@ -1,6 +1,6 @@ #!/bin/python -from A2rchi.utils.config_loader import Config_Loader -from A2rchi.utils.env import read_secret +from a2rchi.utils.config_loader import Config_Loader +from a2rchi.utils.env import read_secret import email import imaplib diff --git a/A2rchi/utils/scraper.py b/a2rchi/utils/scraper.py similarity index 50% rename from A2rchi/utils/scraper.py rename to a2rchi/utils/scraper.py index 9a93caed..a608f3c3 100644 --- a/A2rchi/utils/scraper.py +++ b/a2rchi/utils/scraper.py @@ -1,3 +1,5 @@ +from piazza_api import Piazza + import hashlib import os import re @@ -11,11 +13,12 @@ class Scraper(): - def __init__(self): + def __init__(self, piazza_email=None, piazza_password=None): # fetch configs - from A2rchi.utils.config_loader import Config_Loader + from a2rchi.utils.config_loader import Config_Loader self.config = Config_Loader().config["utils"]["scraper"] self.global_config = Config_Loader().config["global"] + self.piazza_config = Config_Loader().config["utils"].get("piazza", None) self.data_path = self.global_config["DATA_PATH"] # create data path if it doesn't exist @@ -28,6 +31,35 @@ def __init__(self): self.input_lists = Config_Loader().config["chains"]["input_lists"] print(f"input lists: {self.input_lists}") + # # log in to piazza + # if self.piazza_config is not None: + # # create sub-directory for piazza if it doesn't exist + # self.piazza_dir = os.path.join(self.data_path, "piazza") + # os.makedirs(self.piazza_dir, exist_ok=True) + + # self.piazza = Piazza() + # self.piazza.user_login(email=piazza_email, password=piazza_password) + # self.piazza_net = self.piazza.network(self.piazza_config["network_id"]) + + + def piazza_scrape(self, verbose=False): + # clear piazza data if specified + if self.config["reset_data"] : + for file in os.listdir(self.piazza_dir): + os.remove(os.path.join(self.piazza_dir, file)) + + # iterate over resolved messages and structure them as: + # [("User", Q), ("User", A), ("Expert", A), ("User", F), ("Expert", F), etc.] + unresolved_posts = Scraper.scrape_piazza( + upload_dir=self.piazza_dir, + sources_path=os.path.join(self.data_path, 'sources.yml'), + ) + + if verbose: + print("Piazza scraping was completed successfully") + + return unresolved_posts + def hard_scrape(self, verbose=False): """ @@ -49,7 +81,7 @@ def hard_scrape(self, verbose=False): ) if verbose: - print("Scraping was completed successfully") + print("Web scraping was completed successfully") def collect_urls_from_lists(self): @@ -102,3 +134,55 @@ def scrape_urls(urls, upload_dir, sources_path, verify_urls, enable_warnings): # store list of files with urls to file with open(sources_path, 'w') as file: yaml.dump(sources, file) + + + @staticmethod + def scrape_piazza(upload_dir, sources_path): + print(f" SOURCE: {sources_path}") + try: + # load existing sources or initialize as empty dictionary + with open(sources_path, 'r') as file: + sources = yaml.safe_load(file) or {} + except FileNotFoundError: + sources = {} + + # get generator for all posts + unresolved_posts = [] + posts = self.piazza_net.iter_all_posts(sleep=1.5) + for post in posts: + # add post to unresolved posts if it has no answer or an unresolved followup + if post.get("no_answer", False) or post.get("no_answer_followup", False): + unresolved_posts.append(int(post["nr"])) + continue + + # otherwise + + + for url in urls: + # disable warnings if not specified + if not enable_warnings: + import urllib3 + urllib3.disable_warnings() + + # request web page + resp = requests.get(url, verify=verify_urls) + + # write the html output to a file + identifier = hashlib.md5() + identifier.update(url.encode('utf-8')) + file_name = str(int(identifier.hexdigest(), 16))[0:12] + + if (url.split('.')[-1] == 'pdf'): + print(f" Store: {upload_dir}/{file_name}.pdf : {url}") + with open(f"{upload_dir}/{file_name}.pdf", 'wb') as file: + file.write(resp.content) + else: + print(f" Store: {upload_dir}/{file_name}.html : {url}") + with open(f"{upload_dir}/{file_name}.html", 'w') as file: + file.write(resp.text) + + sources[file_name] = url + + # store list of files with urls to file + with open(sources_path, 'w') as file: + yaml.dump(sources, file) diff --git a/A2rchi/utils/sender.py b/a2rchi/utils/sender.py similarity index 97% rename from A2rchi/utils/sender.py rename to a2rchi/utils/sender.py index 0e4d6e1c..15339924 100644 --- a/A2rchi/utils/sender.py +++ b/a2rchi/utils/sender.py @@ -1,4 +1,4 @@ -from A2rchi.utils.env import read_secret +from a2rchi.utils.env import read_secret from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText diff --git a/a2rchi/utils/sql.py b/a2rchi/utils/sql.py new file mode 100644 index 00000000..95d0bf13 --- /dev/null +++ b/a2rchi/utils/sql.py @@ -0,0 +1,41 @@ +"""SQL queries used by A2rchi""" +SQL_INSERT_CONVO = """ +INSERT INTO conversations ( + conversation_id, sender, content, ts +) +VALUES %s +RETURNING message_id; +""" + +SQL_INSERT_FEEDBACK = """ +INSERT INTO feedback ( + mid, feedback_ts, feedback, feedback_msg, incorrect, unhelpful, inappropriate +) +VALUES (%s, %s, %s, %s, %s, %s, %s); +""" + +SQL_QUERY_CONVO = """ +SELECT sender, content +FROM conversations +WHERE conversation_id = %s +ORDER BY message_id ASC; +""" + +SQL_INSERT_TIMING = """ +INSERT INTO timing ( + mid, + client_sent_msg_ts, + server_received_msg_ts, + lock_acquisition_ts, + vectorstore_update_ts, + query_convo_history_ts, + chain_finished_ts, + similarity_search_ts, + a2rchi_message_ts, + insert_convo_ts, + finish_call_ts, + server_response_msg_ts, + msg_duration +) +VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); +""" diff --git a/config/dev-config.yaml b/config/dev-config.yaml index 99d47a3c..a5cc4f2b 100644 --- a/config/dev-config.yaml +++ b/config/dev-config.yaml @@ -14,14 +14,14 @@ interfaces: EXTERNAL_PORT: 7682 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) HOSTNAME: "submit06.mit.edu" # careful, this is used for the chat service - template_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/templates" - static_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/static" + template_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/templates" + static_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/static" num_responses_until_feedback: 3 #the number of responses given by A2rchi until she asks for feedback. include_copy_button: False uploader_app: PORT: 5001 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) - template_folder: "/root/A2rchi/A2rchi/interfaces/uploader_app/templates" + template_folder: "/root/A2rchi/a2rchi/interfaces/uploader_app/templates" chains: input_lists: @@ -88,13 +88,13 @@ utils: port: 5432 user: a2rchi database: a2rchi-db - host: dev-postgres-dev-1 + host: dev-postgres-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 use_HTTP_chromadb_client: True # recommended: True (use http client for the chromadb vectorstore?) # use_HTTP_chromadb_client: False - chromadb_host: chromadb-dev + chromadb_host: chromadb chromadb_port: 8000 collection_name: "dev_collection" reset_collection: True # reset the entire collection each time it is accessed by a new data manager instance diff --git a/config/prod-65830-config.yaml b/config/prod-65830-config.yaml new file mode 100644 index 00000000..126bfd22 --- /dev/null +++ b/config/prod-65830-config.yaml @@ -0,0 +1,104 @@ +global: + TRAINED_ON: "6.5830" #used to create name of the specific version of a2rchi we're using + DATA_PATH: "/root/data/" + ACCOUNTS_PATH: "/root/.accounts/" + LOCAL_VSTORE_PATH: "/root/data/vstore/" + ACCEPTED_FILES: + -".txt" + -".html" + -".pdf" + +chains: + input_lists: + - empty.list + - prod-65830-docs.list + base: + # roles that A2rchi knows about + ROLES: + - User + - A2rchi + - Expert + #logging within base chain + logging: + #name of .log logfile to be saved in data folder. + input_output_filename: chain_input_output.log + prompts: + # prompt that serves to condense a history and a question into a single question + CONDENSING_PROMPT: config/prompts/condense.prompt + # main prompt which takes in a single question and a context. + MAIN_PROMPT: config/prompts/65830.prompt + chain: + # pick one of the models listed in the model class map below + MODEL_NAME: OpenAILLM # LlamaLLM + # map of all the class models and their keyword arguments + MODEL_CLASS_MAP: + OpenAILLM: + class: OpenAILLM + kwargs: + model_name: gpt-3.5-turbo + temperature: 1 + DumbLLM: + class: DumbLLM + kwargs: + filler: null + LlamaLLM: + class: LlamaLLM + kwargs: + base_model: "meta-llama/Llama-2-7b-chat-hf" #the location of the model (ex. meta-llama/Llama-2-70b) + peft_model: null #the location of the finetuning of the model. Can be none + enable_salesforce_content_safety: True # Enable safety check with Salesforce safety flan t5 + quantization: True #enables 8-bit quantization + max_new_tokens: 4096 #The maximum numbers of tokens to generate + seed: null #seed value for reproducibility + do_sample: True #Whether or not to use sampling ; use greedy decoding otherwise. + min_length: null #The minimum length of the sequence to be generated, input prompt + min_new_tokens + use_cache: True #[optional] Whether or not the model should use the past last key/values attentions Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. + top_p: .9 # [optional] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. + temperature: .6 # [optional] The value used to modulate the next token probabilities. + top_k: 50 # [optional] The number of highest probability vocabulary tokens to keep for top-k-filtering. + repetition_penalty: 1.0 #The parameter for repetition penalty. 1.0 means no penalty. + length_penalty: 1 #[optional] Exponential penalty to the length that is used with beam-based generation. + max_padding_length: null # the max padding length to be used with tokenizer padding the prompts. + chain_update_time: 10 # the amount of time (in seconds) which passes between when the chain updates to the newest version of the vectorstore + +utils: + cleo: + cleo_update_time: 10 + mailbox: + IMAP4_PORT: 143 + mailbox_update_time: 10 + data_manager: + CHUNK_SIZE: 1000 + CHUNK_OVERLAP: 0 + use_HTTP_chromadb_client: True # recommended: True (use http client for the chromadb vectorstore?) + # use_HTTP_chromadb_client: False + vectordb_update_time: 10 + chromadb_host: chromadb + chromadb_port: 8000 + collection_name: "65830_collection" + reset_collection: True # reset the entire collection each time it is accessed by a new data manager instance + embeddings: + # choose one embedding from list below + EMBEDDING_NAME: OpenAIEmbeddings + # list of possible embeddings to use in vectorstore + EMBEDDING_CLASS_MAP: + OpenAIEmbeddings: + class: OpenAIEmbeddings + kwargs: + model: text-embedding-ada-002 + similarity_score_reference: 0.4 + HuggingFaceEmbeddings: + class: HuggingFaceEmbeddings + kwargs: + model_name: "sentence-transformers/all-mpnet-base-v2" + model_kwargs: + device: 'cpu' + encode_kwargs: + normalize_embeddings: True + similarity_score_reference: 0.9 + scraper: + reset_data: True # delete websites and sources.yml in data folder + verify_urls: False # should be true when possible + enable_warnings: False # keeps output clean if verify == False + piazza: + network_id: llmf39nuxcs5us diff --git a/config/prod-65830-docs.list b/config/prod-65830-docs.list new file mode 100644 index 00000000..0de0cc89 --- /dev/null +++ b/config/prod-65830-docs.list @@ -0,0 +1,59 @@ +https://dsg.csail.mit.edu/6.5830/index.php +https://db.csail.mit.edu/madden/ +https://people.csail.mit.edu/kraska/ +https://dsg.csail.mit.edu/6.5830/syllabus.php +https://dsg.csail.mit.edu/6.5830/faq.php +https://dsg.csail.mit.edu/6.5830/lectures/lec1-notes.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec2-notes.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec3-notes.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec4_2023.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec5_2023.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec6_2023.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec7-2023.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec8-opt.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec9-cstore.pdf +https://dsg.csail.mit.edu/6.5830/lectures/lec10_2023.pdf +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/README.md +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/lab1.md +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/lab2.md +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/lab3.md +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/main.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/agg_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/agg_op_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/agg_state.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/buffer_pool.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/buffer_pool_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/catalog.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/catalog.txt +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/deadlock_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/delete_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/delete_op_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/easy_parser_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/exprs.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/filter_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/filter_op_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/heap_file.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/heap_file_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/heap_page.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/heap_page_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/insert_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/insert_op_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/join_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/join_op_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/lab1_query_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/lab2_extra_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/limit_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/limit_op_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/locking_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/order_by_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/order_by_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/parser.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/project_op.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/project_op_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/simple_query_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/transaction.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/transaction_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/tuple.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/tuple_test.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/types.go +https://raw.githubusercontent.com/MIT-DB-Class/go-db-hw-2023/main/godb/value_op.go diff --git a/config/prod-801-config.yaml b/config/prod-801-config.yaml index 66e251b9..7cc7395e 100644 --- a/config/prod-801-config.yaml +++ b/config/prod-801-config.yaml @@ -14,14 +14,14 @@ interfaces: EXTERNAL_PORT: 7683 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) HOSTNAME: "t3desk019.mit.edu" # careful, this is used for the chat service - template_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/templates" - static_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/static" + template_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/templates" + static_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/static" num_responses_until_feedback: 3 #the number of responses given by A2rchi until she asks for feedback. include_copy_button: False uploader_app: PORT: 5001 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) - template_folder: "/root/A2rchi/A2rchi/interfaces/uploader_app/templates" + template_folder: "/root/A2rchi/a2rchi/interfaces/uploader_app/templates" chains: input_lists: @@ -86,13 +86,13 @@ utils: port: 5432 user: a2rchi database: a2rchi-db - host: prod-801-postgres-prod-801-1 + host: prod-801-postgres-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 use_HTTP_chromadb_client: True # recommended: True (use http client for the chromadb vectorstore?) # use_HTTP_chromadb_client: False - chromadb_host: chromadb-prod-801 + chromadb_host: chromadb chromadb_port: 8000 collection_name: "prod_801_collection" #unique in case vector stores are ever combined. reset_collection: True # reset the entire collection each time it is accessed by a new data manager instance diff --git a/config/prod-config.yaml b/config/prod-config.yaml index 0a954305..f723418a 100644 --- a/config/prod-config.yaml +++ b/config/prod-config.yaml @@ -14,14 +14,14 @@ interfaces: EXTERNAL_PORT: 7681 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) HOSTNAME: "t3desk019.mit.edu" # careful, this is used for the chat service - template_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/templates" - static_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/static" + template_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/templates" + static_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/static" num_responses_until_feedback: 3 #the number of responses given by A2rchi until she asks for feedback. include_copy_button: False uploader_app: PORT: 5001 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) - template_folder: "/root/A2rchi/A2rchi/interfaces/uploader_app/templates" + template_folder: "/root/A2rchi/a2rchi/interfaces/uploader_app/templates" chains: input_lists: @@ -86,13 +86,13 @@ utils: port: 5432 user: a2rchi database: a2rchi-db - host: prod-postgres-prod-1 + host: prod-postgres-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 use_HTTP_chromadb_client: True # recommended: True (use http client for the chromadb vectorstore?) # use_HTTP_chromadb_client: False - chromadb_host: chromadb-prod + chromadb_host: chromadb chromadb_port: 8000 collection_name: "prod_collection" reset_collection: True # reset the entire collection each time it is accessed by a new data manager instance diff --git a/config/prod-meta-config.yaml b/config/prod-meta-config.yaml index 248ae96a..55a25174 100644 --- a/config/prod-meta-config.yaml +++ b/config/prod-meta-config.yaml @@ -14,14 +14,14 @@ interfaces: EXTERNAL_PORT: 7685 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) HOSTNAME: "submit06.mit.edu" # careful, this is used for the chat service - template_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/templates" - static_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/static" + template_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/templates" + static_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/static" num_responses_until_feedback: 3 #the number of responses given by A2rchi until she asks for feedback. include_copy_button: False uploader_app: PORT: 5001 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) - template_folder: "/root/A2rchi/A2rchi/interfaces/uploader_app/templates" + template_folder: "/root/A2rchi/a2rchi/interfaces/uploader_app/templates" chains: input_lists: @@ -86,7 +86,7 @@ utils: CHUNK_OVERLAP: 0 use_HTTP_chromadb_client: False # recommended: True (use http client for the chromadb vectorstore?) # use_HTTP_chromadb_client: False - chromadb_host: chromadb-prod-meta + chromadb_host: chromadb chromadb_port: 8000 collection_name: "prod_meta_collection" #unique in case vector stores are ever combined. reset_collection: True # reset the entire collection each time it is accessed by a new data manager instance diff --git a/config/prod-root-config.yaml b/config/prod-root-config.yaml index c46545e4..2e3d149d 100644 --- a/config/prod-root-config.yaml +++ b/config/prod-root-config.yaml @@ -14,14 +14,14 @@ interfaces: EXTERNAL_PORT: 7684 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) HOSTNAME: "submit06.mit.edu" # careful, this is used for the chat service - template_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/templates" - static_folder: "/root/A2rchi/A2rchi/interfaces/chat_app/static" + template_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/templates" + static_folder: "/root/A2rchi/a2rchi/interfaces/chat_app/static" num_responses_until_feedback: 3 #the number of responses given by A2rchi until she asks for feedback. include_copy_button: False uploader_app: PORT: 5001 HOST: "0.0.0.0" # either "0.0.0.0" (for public) or "127.0.0.1" (for internal) - template_folder: "/root/A2rchi/A2rchi/interfaces/uploader_app/templates" + template_folder: "/root/A2rchi/a2rchi/interfaces/uploader_app/templates" chains: input_lists: @@ -88,13 +88,13 @@ utils: port: 5432 user: a2rchi database: a2rchi-db - host: prod-root-postgres-prod-root-1 + host: prod-root-postgres-1 data_manager: CHUNK_SIZE: 1000 CHUNK_OVERLAP: 0 use_HTTP_chromadb_client: True # recommended: True (use http client for the chromadb vectorstore?) # use_HTTP_chromadb_client: False - chromadb_host: chromadb-prod-root + chromadb_host: chromadb chromadb_port: 8000 collection_name: "prod_root_collection" #unique in case vector stores are ever combined. reset_collection: True # reset the entire collection each time it is accessed by a new data manager instance diff --git a/config/prompts/65830.prompt b/config/prompts/65830.prompt new file mode 100644 index 00000000..41452a3d --- /dev/null +++ b/config/prompts/65830.prompt @@ -0,0 +1,20 @@ +# Prompt used to qurery LLM with appropriate context and question. +# This prompt is specific to subMIT and likely will not perform well for other applications, where it is recommeneded to write your own prompt and change it in the config +# +# All final promptsd must have the following tags in them, which will be filled with the appropriate information: +# {question} +# {context} +# +You are a conversational chatbot named A2rchi who helps students in MIT's Database Systems class 6.5830 with their questions about coding assignments, problem sets, and general database principles. +Using your Golang and databases knowledge, answer the question at the end. Unless otherwise indicated, assume the users are students learning about databases for the first time, but understand basic Computer Science concepts. + +You will be provided context to help you answer the students' questions. The context may include previous chat history and/or snippets of code. + +The students are implementing a database in Golang called GoDB, so they may ask clarifying questions about how to implement their database. You may reference the source code to suggest implementation approaches, or provide helpful hints, but DO NOT try to implement entire functions on behalf of the students. + +If you don't know the answer to the user's question, say "I don't know". If you need to ask a follow up question, please do. + +Context: {context} + +Question: {question} +Helpful Answer: \ No newline at end of file diff --git a/deploy/create_secret.sh b/deploy/create_secret.sh new file mode 100755 index 00000000..6663c7b3 --- /dev/null +++ b/deploy/create_secret.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +secret_file=$1 +secret=$2 + +touch WORKSPACE/deploy/ENV/secrets/"${secret_file}" +echo "${secret}" >> WORKSPACE/deploy/ENV/secrets/"${secret_file}" +chmod 400 WORKSPACE/deploy/ENV/secrets/"${secret_file}" diff --git a/deploy/dev/dev-compose.yaml b/deploy/dev/dev-compose.yaml index 83236e45..050ede81 100644 --- a/deploy/dev/dev-compose.yaml +++ b/deploy/dev/dev-compose.yaml @@ -1,5 +1,5 @@ services: - cleo-dev: + cleo: image: cleo-dev:${TAG} build: context: ../.. @@ -7,9 +7,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-dev: + chromadb: condition: service_healthy - postgres-dev: + postgres: condition: service_healthy environment: RUNTIME_ENV: dev @@ -45,7 +45,7 @@ services: max-size: 10m restart: always - chat-dev: + chat: image: chat-dev:${TAG} build: context: ../.. @@ -53,9 +53,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-dev: + chromadb: condition: service_healthy - postgres-dev: + postgres: condition: service_healthy environment: RUNTIME_ENV: dev @@ -75,7 +75,7 @@ services: - 7682:7861 # host:container restart: always - mailbox-dev: + mailbox: image: mailbox-dev:${TAG} build: context: ../.. @@ -83,9 +83,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-dev: + chromadb: condition: service_healthy - postgres-dev: + postgres: condition: service_healthy environment: RUNTIME_ENV: dev @@ -121,7 +121,7 @@ services: max-size: 10m restart: always - uploader-dev: + uploader: image: uploader-dev:${TAG} build: context: ../.. @@ -129,9 +129,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-dev: + chromadb: condition: service_healthy - postgres-dev: + postgres: condition: service_healthy environment: RUNTIME_ENV: dev @@ -153,7 +153,7 @@ services: max-size: 10m restart: always - chromadb-dev: + chromadb: image: chromadb-dev:${TAG} build: context: ../.. @@ -178,7 +178,7 @@ services: start_period: 10s start_interval: 5s - postgres-dev: + postgres: image: postgres:16 environment: RUNTIME_ENV: dev @@ -188,7 +188,7 @@ services: secrets: - pg_password volumes: - - ./dev-init.sql:/docker-entrypoint-initdb.d/init.sql + - ../init.sql:/docker-entrypoint-initdb.d/init.sql - a2rchi-dev-pg-data:/var/lib/postgresql/data logging: options: @@ -200,11 +200,31 @@ services: timeout: 5s retries: 5 + grafana: + image: grafana-dev:${TAG} + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-grafana + depends_on: + postgres: + condition: service_healthy + ports: + - 3001:3000 # host:container + volumes: + - a2rchi-dev-grafana-data:/var/lib/grafana + - ../grafana/a2rchi-default-dashboard.json:/var/lib/grafana/dashboards/a2rchi-default-dashboard.json + logging: + options: + max-size: 10m + restart: always + volumes: a2rchi-dev-data: external: true a2rchi-dev-pg-data: external: true + a2rchi-dev-grafana-data: + external: true secrets: imap_user: diff --git a/deploy/dev/dev-init.sql b/deploy/dev/dev-init.sql deleted file mode 100644 index 90d49c61..00000000 --- a/deploy/dev/dev-init.sql +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE IF NOT EXISTS conversations ( - conversation_id INTEGER NOT NULL, - message_id SERIAL, - sender TEXT NOT NULL, - content TEXT NOT NULL, - ts TIMESTAMP NOT NULL, - PRIMARY KEY (message_id) -); -CREATE TABLE IF NOT EXISTS feedback ( - mid INTEGER NOT NULL, - feedback_ts TIMESTAMP NOT NULL, - feedback TEXT NOT NULL, - feedback_msg TEXT, - incorrect BOOLEAN, - unhelpful BOOLEAN, - inappropriate BOOLEAN, - PRIMARY KEY (mid, feedback_ts), - FOREIGN KEY (mid) REFERENCES conversations(message_id) -); \ No newline at end of file diff --git a/deploy/dev/dev-install.sh b/deploy/dev/dev-install.sh index ba11b725..bbdb2df4 100755 --- a/deploy/dev/dev-install.sh +++ b/deploy/dev/dev-install.sh @@ -12,6 +12,18 @@ if [[ $exists != 'a2rchi-dev-pg-data' ]]; then docker volume create --name a2rchi-dev-pg-data fi +# create volume if it doesn't already exist for grafana data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-dev-grafana-data` +if [[ $exists != 'a2rchi-dev-grafana-data' ]]; then + docker volume create --name a2rchi-dev-grafana-data +fi + +# fill-in variables in grafana files +export grafanapass=`cat A2rchi-dev/deploy/dev/secrets/grafana_password.txt` +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-dev/deploy/grafana/datasources.yaml +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-dev/deploy/init.sql +unset grafanapass + # build base image; try to reuse previously built image cd A2rchi-dev/deploy/dev/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. @@ -21,10 +33,4 @@ echo "Starting docker compose" docker compose -f dev-compose.yaml up -d --build --force-recreate --always-recreate-deps # # secrets files are created by CI pipeline and destroyed here -# rm secrets/cleo_*.txt -# rm secrets/imap_*.txt -# rm secrets/sender_*.txt -# rm secrets/flask_uploader_app_secret_key.txt -# rm secrets/uploader_salt.txt -# rm secrets/openai_api_key.txt -# rm secrets/hf_token.txt +# rm secrets/*.txt diff --git a/deploy/dev/dev-stop.sh b/deploy/dev/dev-stop.sh index c097eb73..1ac9586c 100755 --- a/deploy/dev/dev-stop.sh +++ b/deploy/dev/dev-stop.sh @@ -1,5 +1,9 @@ #!/bin/bash -echo "Stop running docker compose" -cd A2rchi-dev/deploy/dev/ -docker compose -f dev-compose.yaml down +if [ -z "$(ls -A A2rchi-dev/deploy/dev/)" ]; then + echo "Deployment directory is empty; skipping docker compose down" +else + echo "Stop running docker compose" + cd A2rchi-dev/deploy/dev/ + docker compose -f dev-compose.yaml down +fi diff --git a/deploy/dockerfiles/Dockerfile-base b/deploy/dockerfiles/Dockerfile-base index 9952d0e5..62925b8e 100644 --- a/deploy/dockerfiles/Dockerfile-base +++ b/deploy/dockerfiles/Dockerfile-base @@ -7,5 +7,5 @@ COPY pyproject.toml pyproject.toml COPY README.md README.md COPY LICENSE LICENSE COPY config config -COPY A2rchi A2rchi +COPY a2rchi a2rchi RUN pip install --upgrade pip && pip install . diff --git a/deploy/dockerfiles/Dockerfile-chat b/deploy/dockerfiles/Dockerfile-chat index 0902e821..077738c5 100644 --- a/deploy/dockerfiles/Dockerfile-chat +++ b/deploy/dockerfiles/Dockerfile-chat @@ -6,4 +6,4 @@ EXPOSE 7861 # ensure this directory is present for prod-801 deployment RUN if [ "$BUILD_ENV" = "prod-801" ] ; then mkdir /root/data/801-content ; fi -CMD ["python", "-u", "A2rchi/bin/service_chat.py"] +CMD ["python", "-u", "a2rchi/bin/service_chat.py"] diff --git a/deploy/dockerfiles/Dockerfile-chroma b/deploy/dockerfiles/Dockerfile-chroma index 16bf8dc3..0d06bf59 100644 --- a/deploy/dockerfiles/Dockerfile-chroma +++ b/deploy/dockerfiles/Dockerfile-chroma @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1 FROM ghcr.io/chroma-core/chroma:0.4.12 -RUN apt-get update -y && apt-get install -o Acquire::ForceIPv4=true -y curl +RUN apt-get update -y && apt-get install -y curl CMD ["uvicorn", "chromadb.app:app", "--reload", "--workers", "1", "--host", "0.0.0.0", "--port", "8000", "--log-config", "chromadb/log_config.yml"] diff --git a/deploy/dockerfiles/Dockerfile-cleo b/deploy/dockerfiles/Dockerfile-cleo index 27ab9e68..de2e612a 100644 --- a/deploy/dockerfiles/Dockerfile-cleo +++ b/deploy/dockerfiles/Dockerfile-cleo @@ -2,4 +2,4 @@ ARG TAG=from-env FROM a2rchi-base:${TAG} -CMD ["python", "-u", "A2rchi/bin/service_cleo.py"] +CMD ["python", "-u", "a2rchi/bin/service_cleo.py"] diff --git a/deploy/dockerfiles/Dockerfile-grafana b/deploy/dockerfiles/Dockerfile-grafana new file mode 100644 index 00000000..1373aa4c --- /dev/null +++ b/deploy/dockerfiles/Dockerfile-grafana @@ -0,0 +1,11 @@ +# syntax=docker/dockerfile:1 + +# create grafana directories +FROM grafana/grafana-enterprise:10.2.0 +RUN mkdir -p /etc/grafana/provisioning/datasources +RUN mkdir -p /etc/grafana/provisioning/dashboards +RUN mkdir -p /var/lib/grafana/dashboards + +# copy files into respective grafana directories +COPY deploy/grafana/datasources.yaml /etc/grafana/provisioning/datasources/ +COPY deploy/grafana/dashboards.yaml /etc/grafana/provisioning/dashboards/ diff --git a/deploy/dockerfiles/Dockerfile-mailbox b/deploy/dockerfiles/Dockerfile-mailbox index 78faa4ea..bbcb8f1b 100644 --- a/deploy/dockerfiles/Dockerfile-mailbox +++ b/deploy/dockerfiles/Dockerfile-mailbox @@ -2,4 +2,4 @@ ARG TAG=from-env FROM a2rchi-base:${TAG} -CMD ["python", "-u", "A2rchi/bin/service_mailbox.py"] +CMD ["python", "-u", "a2rchi/bin/service_mailbox.py"] diff --git a/deploy/dockerfiles/Dockerfile-piazza b/deploy/dockerfiles/Dockerfile-piazza new file mode 100644 index 00000000..92e0280f --- /dev/null +++ b/deploy/dockerfiles/Dockerfile-piazza @@ -0,0 +1,5 @@ +# syntax=docker/dockerfile:1 +ARG TAG=from-env +FROM a2rchi-base:${TAG} + +CMD ["python", "-u", "a2rchi/bin/service_piazza.py"] diff --git a/deploy/dockerfiles/Dockerfile-uploader b/deploy/dockerfiles/Dockerfile-uploader index a119e80a..4f1833db 100644 --- a/deploy/dockerfiles/Dockerfile-uploader +++ b/deploy/dockerfiles/Dockerfile-uploader @@ -2,4 +2,4 @@ ARG TAG=from-env FROM a2rchi-base:${TAG} -CMD ["python", "-u", "A2rchi/bin/service_uploader.py"] +CMD ["python", "-u", "a2rchi/bin/service_uploader.py"] diff --git a/deploy/grafana/a2rchi-default-dashboard.json b/deploy/grafana/a2rchi-default-dashboard.json new file mode 100644 index 00000000..15330c24 --- /dev/null +++ b/deploy/grafana/a2rchi-default-dashboard.json @@ -0,0 +1,469 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Charts tracking A2rchi's usage.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "description": "Number of queries handled by A2rchi per-hour.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 40, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "-- number of messages per 15min window\nWITH msg_timing_info AS (\n SELECT $__timeGroup(server_received_msg_ts, '1h', 0) as t1, count(mid) as num_msgs\n FROM timing\n WHERE $__timeFrom() <= server_received_msg_ts AND server_received_msg_ts <= $__timeTo()\n GROUP BY 1\n ORDER BY $__timeGroup(server_received_msg_ts, '1h', 0)\n), convo_timing_info AS ( -- number of unique conversations per 15 min window\n SELECT $__timeGroup(server_received_msg_ts, '1h', 0) as t2, count(distinct(conversation_id)) as num_convos\n FROM timing JOIN conversations ON timing.mid = conversations.message_id\n WHERE $__timeFrom() <= server_received_msg_ts AND server_received_msg_ts <= $__timeTo()\n GROUP BY 1\n ORDER BY $__timeGroup(server_received_msg_ts, '1h', 0)\n), intervals AS (\n SELECT $__timeGroupAlias(t, '1h', 0) FROM generate_series($__timeFrom(), $__timeTo(), '1 hour'::interval) AS s(t)\n), msgs_join_convos AS (\n SELECT * FROM msg_timing_info JOIN convo_timing_info ON t1 = t2\n)\nSELECT time, num_msgs, num_convos FROM intervals LEFT OUTER JOIN msgs_join_convos ON t1 = intervals.time;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [ + { + "name": "*", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "timing" + } + ], + "title": "Number of Messages and Distinct Conversations", + "type": "timeseries" + }, + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "description": "Distribution of `msg_duration` column, which measures the total time from when the server receives the message to when it returns a response to the user.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "editorMode": "code", + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT server_received_msg_ts as time, extract(seconds from msg_duration) as msg_duration_seconds\nFROM timing\nWHERE server_received_msg_ts >= $__timeFrom() AND server_received_msg_ts < $__timeTo()\nORDER BY time", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [ + { + "name": "msg_duration", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "timing" + } + ], + "title": "A2rchi Response Time Histogram", + "type": "histogram" + }, + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "description": "Bar chart tracking the feedback totals.", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [ + { + "options": { + "dislike": { + "color": "yellow", + "index": 1 + }, + "like": { + "color": "green", + "index": 0 + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "barRadius": 0, + "barWidth": 0.9, + "colorByField": "feedback", + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xField": "feedback", + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "editorMode": "code", + "format": "table", + "hide": false, + "rawQuery": true, + "rawSql": "WITH filtered_feedback AS (\n SELECT mid, feedback, max(feedback_ts)\n FROM feedback\n GROUP BY mid, feedback\n)\nSELECT count(*), feedback, 'baseline' as config FROM filtered_feedback GROUP BY config, feedback;\n", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Feedback", + "type": "barchart" + }, + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "description": "A histogram showing the distribution of the number of messages per conversation.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1 + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "bucketOffset": 0, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + } + }, + "targets": [ + { + "datasource": { + "type": "postgres", + "uid": "P44368ADAD746BC27" + }, + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT count(message_id)\nFROM conversations\nGROUP BY conversation_id", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Conversations w/X Messages Histogram", + "type": "histogram" + } + ], + "refresh": "", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "A2rchi Usage", + "uid": "faf20efc-ffe5-48ae-9a26-debe655bf5a8", + "version": 6, + "weekStart": "" +} diff --git a/deploy/grafana/dashboards.yaml b/deploy/grafana/dashboards.yaml new file mode 100644 index 00000000..476ef883 --- /dev/null +++ b/deploy/grafana/dashboards.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +providers: + - name: A2rchi + folder: A2rchi + type: file + options: + path: + /var/lib/grafana/dashboards diff --git a/deploy/grafana/datasources.yaml b/deploy/grafana/datasources.yaml new file mode 100644 index 00000000..969ff44f --- /dev/null +++ b/deploy/grafana/datasources.yaml @@ -0,0 +1,18 @@ +apiVersion: 1 + +datasources: + - name: Postgres + type: postgres + url: postgres:5432 # templated by install script + user: grafana + secureJsonData: + password: 'GRAFANA_PASSWORD' + jsonData: + database: a2rchi-db + sslmode: 'disable' # disable/require/verify-ca/verify-full + maxOpenConns: 100 # Grafana v5.4+ + maxIdleConns: 100 # Grafana v5.4+ + maxIdleConnsAuto: true # Grafana v9.5.1+ + connMaxLifetime: 14400 # Grafana v5.4+ + postgresVersion: 1000 # 903=9.3, 904=9.4, 905=9.5, 906=9.6, 1000=10; 1000 refers to >= v10 + timescaledb: false diff --git a/deploy/init.sql b/deploy/init.sql new file mode 100644 index 00000000..a86f2575 --- /dev/null +++ b/deploy/init.sql @@ -0,0 +1,52 @@ +-- create tables +CREATE TABLE IF NOT EXISTS conversations ( + conversation_id INTEGER NOT NULL, + message_id SERIAL, + sender TEXT NOT NULL, + content TEXT NOT NULL, + ts TIMESTAMP NOT NULL, + PRIMARY KEY (message_id) +); +CREATE TABLE IF NOT EXISTS feedback ( + mid INTEGER NOT NULL, + feedback_ts TIMESTAMP NOT NULL, + feedback TEXT NOT NULL, + feedback_msg TEXT, + incorrect BOOLEAN, + unhelpful BOOLEAN, + inappropriate BOOLEAN, + PRIMARY KEY (mid, feedback_ts), + FOREIGN KEY (mid) REFERENCES conversations(message_id) +); +CREATE TABLE IF NOT EXISTS timing ( + mid INTEGER NOT NULL, + client_sent_msg_ts TIMESTAMP NOT NULL, + server_received_msg_ts TIMESTAMP NOT NULL, + lock_acquisition_ts TIMESTAMP NOT NULL, + vectorstore_update_ts TIMESTAMP NOT NULL, + query_convo_history_ts TIMESTAMP NOT NULL, + chain_finished_ts TIMESTAMP NOT NULL, + similarity_search_ts TIMESTAMP NOT NULL, + a2rchi_message_ts TIMESTAMP NOT NULL, + insert_convo_ts TIMESTAMP NOT NULL, + finish_call_ts TIMESTAMP NOT NULL, + server_response_msg_ts TIMESTAMP NOT NULL, + msg_duration INTERVAL SECOND NOT NULL, + PRIMARY KEY (mid), + FOREIGN KEY (mid) REFERENCES conversations(message_id) +); + +-- create grafana user if it does not exist +DO +$do$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'grafana') THEN + CREATE USER grafana WITH PASSWORD 'GRAFANA_PASSWORD'; + GRANT USAGE ON SCHEMA public TO grafana; + GRANT SELECT ON public.timing TO grafana; + GRANT SELECT ON public.conversations TO grafana; + GRANT SELECT ON public.feedback TO grafana; + END IF; +END +$do$; + diff --git a/deploy/prod-65830/prod-65830-compose.yaml b/deploy/prod-65830/prod-65830-compose.yaml new file mode 100644 index 00000000..02f8ad72 --- /dev/null +++ b/deploy/prod-65830/prod-65830-compose.yaml @@ -0,0 +1,71 @@ +services: + piazza: + image: piazza:${TAG} + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-piazza + args: + TAG: ${TAG} + depends_on: + chromadb: + condition: service_healthy + environment: + RUNTIME_ENV: prod-65830 + OPENAI_API_KEY_FILE: /run/secrets/openai_api_key + HUGGING_FACE_HUB_TOKEN_FILE: /run/secrets/hf_token + PIAZZA_EMAIL_FILE: /run/secrets/piazza_email + PIAZZA_PASSWORD_FILE: /run/secrets/piazza_password + SLACK_WEBHOOK_FILE: /run/secrets/slack_webhook + secrets: + - openai_api_key + - hf_token + - piazza_email + - piazza_password + - slack_webhook + volumes: + - a2rchi-prod-65830-data:/root/data/ + # - /home/ubuntu/piazza-content/:/root/data/piazza-content/ + logging: + options: + max-size: 10m + restart: always + + chromadb: + image: chromadb-prod-65830:${TAG} + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-chroma + environment: + RUNTIME_ENV: prod-65830 + ports: + - 8005:8000 # host:container + volumes: + - a2rchi-prod-65830-data:/chroma/chroma/ + logging: + options: + max-size: 10m + restart: always + # healthcheck originates from inside container; so use container port + healthcheck: + test: ["CMD", "curl", "-f", "http://0.0.0.0:8000/api/v1/heartbeat"] + interval: 15s + timeout: 10s + retries: 3 + start_period: 10s + start_interval: 5s + +volumes: + a2rchi-prod-65830-data: + external: true + +secrets: + openai_api_key: + file: secrets/openai_api_key.txt + hf_token: + file: secrets/hf_token.txt + piazza_email: + file: secrets/piazza_email.txt + piazza_password: + file: secrets/piazza_password.txt + slack_webhook: + file: secrets/slack_webhook.txt diff --git a/deploy/prod-65830/prod-65830-install.sh b/deploy/prod-65830/prod-65830-install.sh new file mode 100755 index 00000000..953d843c --- /dev/null +++ b/deploy/prod-65830/prod-65830-install.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# create volume if it doesn't already exist +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-65830-data` +if [[ $exists != 'a2rchi-prod-65830-data' ]]; then + docker volume create --name a2rchi-prod-65830-data +fi + +# # create volume if it doesn't already exist for postgres data +# exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-65830-pg-data` +# if [[ $exists != 'a2rchi-prod-65830-pg-data' ]]; then +# docker volume create --name a2rchi-prod-65830-pg-data +# fi + +# # create volume if it doesn't already exist for grafana data +# exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-65830-grafana-data` +# if [[ $exists != 'a2rchi-prod-65830-grafana-data' ]]; then +# docker volume create --name a2rchi-prod-65830-grafana-data +# fi + +# # fill-in variables in grafana files +# export grafanapass=`cat A2rchi-prod-65830/deploy/prod-65830/secrets/grafana_password.txt` +# sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod-65830/deploy/grafana/datasources.yaml +# sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod-65830/deploy/init.sql +# unset grafanapass + +# build base image; try to reuse previously built image +cd A2rchi-prod-65830/deploy/prod-65830/ +docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. + +# start services +echo "Starting docker compose" +docker compose -f prod-65830-compose.yaml up -d --build --force-recreate --always-recreate-deps + +# # secrets files are created by CI pipeline and destroyed here +# rm secrets/*.txt diff --git a/deploy/prod-65830/prod-65830-stop.sh b/deploy/prod-65830/prod-65830-stop.sh new file mode 100644 index 00000000..67ee7ac2 --- /dev/null +++ b/deploy/prod-65830/prod-65830-stop.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +if [ -z "$(ls -A A2rchi-prod-65830/deploy/prod-65830/)" ]; then + echo "Deployment directory is empty; skipping docker compose down" +else + echo "Stop running docker compose" + cd A2rchi-prod-65830/deploy/prod-65830/ + docker compose -f prod-65830-compose.yaml down +fi diff --git a/deploy/prod-801/prod-801-compose.yaml b/deploy/prod-801/prod-801-compose.yaml index 7b49544c..7ca01ce9 100644 --- a/deploy/prod-801/prod-801-compose.yaml +++ b/deploy/prod-801/prod-801-compose.yaml @@ -1,5 +1,5 @@ services: - chat-prod-801: + chat: image: chat-prod-801:${TAG} build: context: ../.. @@ -8,7 +8,9 @@ services: BUILD_ENV: prod-801 TAG: ${TAG} depends_on: - chromadb-prod-801: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod-801 @@ -28,7 +30,7 @@ services: - 7683:7861 # host:container restart: always - uploader-prod-801: + uploader: image: uploader-prod-801:${TAG} build: context: ../.. @@ -36,7 +38,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-prod-801: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod-801 @@ -59,7 +63,7 @@ services: max-size: 10m restart: always - chromadb-prod-801: + chromadb: image: chromadb-prod-801:${TAG} build: context: ../.. @@ -83,7 +87,7 @@ services: start_period: 10s start_interval: 5s - postgres-prod-801: + postgres: image: postgres:16 environment: RUNTIME_ENV: prod-801 @@ -93,7 +97,7 @@ services: secrets: - pg_password volumes: - - ./prod-801-init.sql:/docker-entrypoint-initdb.d/init.sql + - ../init.sql:/docker-entrypoint-initdb.d/init.sql - a2rchi-prod-801-pg-data:/var/lib/postgresql/data logging: options: @@ -105,11 +109,31 @@ services: timeout: 5s retries: 5 + grafana: + image: grafana-prod-801:${TAG} + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-grafana + depends_on: + postgres: + condition: service_healthy + ports: + - 3002:3000 # host:container + volumes: + - a2rchi-prod-801-grafana-data:/var/lib/grafana + - ../grafana/a2rchi-default-dashboard.json:/var/lib/grafana/dashboards/a2rchi-default-dashboard.json + logging: + options: + max-size: 10m + restart: always + volumes: a2rchi-prod-801-data: external: true a2rchi-prod-801-pg-data: external: true + a2rchi-prod-801-grafana-data: + external: true secrets: flask_uploader_app_secret_key: diff --git a/deploy/prod-801/prod-801-init.sql b/deploy/prod-801/prod-801-init.sql deleted file mode 100644 index 90d49c61..00000000 --- a/deploy/prod-801/prod-801-init.sql +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE IF NOT EXISTS conversations ( - conversation_id INTEGER NOT NULL, - message_id SERIAL, - sender TEXT NOT NULL, - content TEXT NOT NULL, - ts TIMESTAMP NOT NULL, - PRIMARY KEY (message_id) -); -CREATE TABLE IF NOT EXISTS feedback ( - mid INTEGER NOT NULL, - feedback_ts TIMESTAMP NOT NULL, - feedback TEXT NOT NULL, - feedback_msg TEXT, - incorrect BOOLEAN, - unhelpful BOOLEAN, - inappropriate BOOLEAN, - PRIMARY KEY (mid, feedback_ts), - FOREIGN KEY (mid) REFERENCES conversations(message_id) -); \ No newline at end of file diff --git a/deploy/prod-801/prod-801-install.sh b/deploy/prod-801/prod-801-install.sh index 867b4607..bb56326d 100644 --- a/deploy/prod-801/prod-801-install.sh +++ b/deploy/prod-801/prod-801-install.sh @@ -12,6 +12,18 @@ if [[ $exists != 'a2rchi-prod-801-pg-data' ]]; then docker volume create --name a2rchi-prod-801-pg-data fi +# create volume if it doesn't already exist for grafana data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-801-grafana-data` +if [[ $exists != 'a2rchi-prod-801-grafana-data' ]]; then + docker volume create --name a2rchi-prod-801-grafana-data +fi + +# fill-in variables in grafana files +export grafanapass=`cat A2rchi-prod-801/deploy/prod-801/secrets/grafana_password.txt` +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod-801/deploy/grafana/datasources.yaml +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod-801/deploy/init.sql +unset grafanapass + # build base image; try to reuse previously built image cd A2rchi-prod-801/deploy/prod-801/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. @@ -21,10 +33,4 @@ echo "Starting docker compose" docker compose -f prod-801-compose.yaml up -d --build --force-recreate --always-recreate-deps # # secrets files are created by CI pipeline and destroyed here -# rm secrets/cleo_*.txt -# rm secrets/imap_*.txt -# rm secrets/sender_*.txt -# rm secrets/flask_uploader_app_secret_key.txt -# rm secrets/uploader_salt.txt -# rm secrets/openai_api_key.txt -# rm secrets/hf_token.txt +# rm secrets/*.txt diff --git a/deploy/prod-801/prod-801-stop.sh b/deploy/prod-801/prod-801-stop.sh index d367bd5b..be9c4f1d 100644 --- a/deploy/prod-801/prod-801-stop.sh +++ b/deploy/prod-801/prod-801-stop.sh @@ -1,5 +1,9 @@ #!/bin/bash -echo "Stop running docker compose" -cd A2rchi-prod-801/deploy/prod-801/ -docker compose -f prod-801-compose.yaml down +if [ -z "$(ls -A A2rchi-prod-801/deploy/prod-801/)" ]; then + echo "Deployment directory is empty; skipping docker compose down" +else + echo "Stop running docker compose" + cd A2rchi-prod-801/deploy/prod-801/ + docker compose -f prod-801-compose.yaml down +fi diff --git a/deploy/prod-meta/prod-meta-compose.yaml b/deploy/prod-meta/prod-meta-compose.yaml index 78c5c36a..d9bb20e8 100644 --- a/deploy/prod-meta/prod-meta-compose.yaml +++ b/deploy/prod-meta/prod-meta-compose.yaml @@ -1,5 +1,5 @@ services: - cleo-prod-meta: + cleo: image: cleo-prod-meta:${TAG} build: context: ../.. @@ -38,7 +38,7 @@ services: max-size: 10m restart: always - mailbox-prod-meta: + mailbox: image: mailbox-prod-meta:${TAG} build: context: ../.. diff --git a/deploy/prod-meta/prod-meta-stop.sh b/deploy/prod-meta/prod-meta-stop.sh index e7ca8bce..6c164e29 100755 --- a/deploy/prod-meta/prod-meta-stop.sh +++ b/deploy/prod-meta/prod-meta-stop.sh @@ -1,5 +1,9 @@ #!/bin/bash -echo "Stop running docker compose" -cd A2rchi-prod-meta/deploy/prod-meta/ -docker compose -f prod-meta-compose.yaml down +if [ -z "$(ls -A A2rchi-prod-meta/deploy/prod-meta/)" ]; then + echo "Deployment directory is empty; skipping docker compose down" +else + echo "Stop running docker compose" + cd A2rchi-prod-meta/deploy/prod-meta/ + docker compose -f prod-meta-compose.yaml down +fi diff --git a/deploy/prod-root/prod-root-compose.yaml b/deploy/prod-root/prod-root-compose.yaml index 320aa17d..9ce612c2 100644 --- a/deploy/prod-root/prod-root-compose.yaml +++ b/deploy/prod-root/prod-root-compose.yaml @@ -1,5 +1,5 @@ services: - chat-prod-root: + chat: image: chat-prod-root:${TAG} build: context: ../.. @@ -7,7 +7,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-prod-root: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod-root @@ -27,7 +29,7 @@ services: max-size: 10m restart: always - uploader-prod-root: + uploader: image: uploader-prod-root:${TAG} build: context: ../.. @@ -35,7 +37,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-prod-root: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod-root @@ -58,7 +62,7 @@ services: max-size: 10m restart: always - chromadb-prod-root: + chromadb: image: chromadb-prod-root:${TAG} build: context: ../.. @@ -82,7 +86,7 @@ services: start_period: 10s start_interval: 5s - postgres-prod-root: + postgres: image: postgres:16 environment: RUNTIME_ENV: prod-root @@ -92,7 +96,7 @@ services: secrets: - pg_password volumes: - - ./prod-root-init.sql:/docker-entrypoint-initdb.d/init.sql + - ../init.sql:/docker-entrypoint-initdb.d/init.sql - a2rchi-prod-root-pg-data:/var/lib/postgresql/data logging: options: @@ -104,11 +108,31 @@ services: timeout: 5s retries: 5 + grafana: + image: grafana-prod-root:${TAG} + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-grafana + depends_on: + postgres: + condition: service_healthy + ports: + - 3003:3000 # host:container + volumes: + - a2rchi-prod-root-grafana-data:/var/lib/grafana + - ../grafana/a2rchi-default-dashboard.json:/var/lib/grafana/dashboards/a2rchi-default-dashboard.json + logging: + options: + max-size: 10m + restart: always + volumes: a2rchi-prod-root-data: external: true a2rchi-prod-root-pg-data: external: true + a2rchi-prod-root-grafana-data: + external: true secrets: flask_uploader_app_secret_key: diff --git a/deploy/prod-root/prod-root-init.sql b/deploy/prod-root/prod-root-init.sql deleted file mode 100644 index 90d49c61..00000000 --- a/deploy/prod-root/prod-root-init.sql +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE IF NOT EXISTS conversations ( - conversation_id INTEGER NOT NULL, - message_id SERIAL, - sender TEXT NOT NULL, - content TEXT NOT NULL, - ts TIMESTAMP NOT NULL, - PRIMARY KEY (message_id) -); -CREATE TABLE IF NOT EXISTS feedback ( - mid INTEGER NOT NULL, - feedback_ts TIMESTAMP NOT NULL, - feedback TEXT NOT NULL, - feedback_msg TEXT, - incorrect BOOLEAN, - unhelpful BOOLEAN, - inappropriate BOOLEAN, - PRIMARY KEY (mid, feedback_ts), - FOREIGN KEY (mid) REFERENCES conversations(message_id) -); \ No newline at end of file diff --git a/deploy/prod-root/prod-root-install.sh b/deploy/prod-root/prod-root-install.sh index c915b171..6c3a1f6b 100644 --- a/deploy/prod-root/prod-root-install.sh +++ b/deploy/prod-root/prod-root-install.sh @@ -12,6 +12,18 @@ if [[ $exists != 'a2rchi-prod-root-pg-data' ]]; then docker volume create --name a2rchi-prod-root-pg-data fi +# create volume if it doesn't already exist for grafana data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-root-grafana-data` +if [[ $exists != 'a2rchi-prod-root-grafana-data' ]]; then + docker volume create --name a2rchi-prod-root-grafana-data +fi + +# fill-in variables in grafana files +export grafanapass=`cat A2rchi-prod-root/deploy/prod-root/secrets/grafana_password.txt` +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod-root/deploy/grafana/datasources.yaml +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod-root/deploy/init.sql +unset grafanapass + # build base image; try to reuse previously built image cd A2rchi-prod-root/deploy/prod-root/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. @@ -21,10 +33,4 @@ echo "Starting docker compose" docker compose -f prod-root-compose.yaml up -d --build --force-recreate --always-recreate-deps # # secrets files are created by CI pipeline and destroyed here -# rm secrets/cleo_*.txt -# rm secrets/imap_*.txt -# rm secrets/sender_*.txt -# rm secrets/flask_uploader_app_secret_key.txt -# rm secrets/uploader_salt.txt -# rm secrets/openai_api_key.txt -# rm secrets/hf_token.txt +# rm secrets/*.txt diff --git a/deploy/prod-root/prod-root-stop.sh b/deploy/prod-root/prod-root-stop.sh index ce164c69..c161a976 100644 --- a/deploy/prod-root/prod-root-stop.sh +++ b/deploy/prod-root/prod-root-stop.sh @@ -1,5 +1,9 @@ #!/bin/bash -echo "Stop running docker compose" -cd A2rchi-prod-root/deploy/prod-root/ -docker compose -f prod-root-compose.yaml down \ No newline at end of file +if [ -z "$(ls -A A2rchi-prod-root/deploy/prod-root/)" ]; then + echo "Deployment directory is empty; skipping docker compose down" +else + echo "Stop running docker compose" + cd A2rchi-prod-root/deploy/prod-root/ + docker compose -f prod-root-compose.yaml down +fi diff --git a/deploy/prod/prod-compose.yaml b/deploy/prod/prod-compose.yaml index eb7b8fba..850483cd 100644 --- a/deploy/prod/prod-compose.yaml +++ b/deploy/prod/prod-compose.yaml @@ -1,5 +1,5 @@ services: - cleo-prod: + cleo: image: cleo-prod:${TAG} build: context: ../.. @@ -7,7 +7,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-prod: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod @@ -41,7 +43,7 @@ services: max-size: 10m restart: always - chat-prod: + chat: image: chat-prod:${TAG} build: context: ../.. @@ -49,7 +51,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-prod: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod @@ -69,7 +73,7 @@ services: - 7681:7861 # host:container restart: always - mailbox-prod: + mailbox: image: mailbox-prod:${TAG} build: context: ../.. @@ -77,7 +81,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-prod: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod @@ -113,7 +119,7 @@ services: max-size: 10m restart: always - uploader-prod: + uploader: image: uploader-prod:${TAG} build: context: ../.. @@ -121,7 +127,9 @@ services: args: TAG: ${TAG} depends_on: - chromadb-prod: + chromadb: + condition: service_healthy + postgres: condition: service_healthy environment: RUNTIME_ENV: prod @@ -143,7 +151,7 @@ services: max-size: 10m restart: always - chromadb-prod: + chromadb: image: chromadb-prod:${TAG} build: context: ../.. @@ -167,7 +175,7 @@ services: start_period: 10s start_interval: 5s - postgres-prod: + postgres: image: postgres:16 environment: RUNTIME_ENV: prod @@ -177,7 +185,7 @@ services: secrets: - pg_password volumes: - - ./prod-init.sql:/docker-entrypoint-initdb.d/init.sql + - ../init.sql:/docker-entrypoint-initdb.d/init.sql - a2rchi-prod-pg-data:/var/lib/postgresql/data logging: options: @@ -189,11 +197,31 @@ services: timeout: 5s retries: 5 + grafana: + image: grafana-prod:${TAG} + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-grafana + depends_on: + postgres: + condition: service_healthy + ports: + - 3000:3000 # host:container + volumes: + - a2rchi-prod-grafana-data:/var/lib/grafana + - ../grafana/a2rchi-default-dashboard.json:/var/lib/grafana/dashboards/a2rchi-default-dashboard.json + logging: + options: + max-size: 10m + restart: always + volumes: a2rchi-prod-data: external: true a2rchi-prod-pg-data: external: true + a2rchi-prod-grafana-data: + external: true secrets: imap_user: diff --git a/deploy/prod/prod-init.sql b/deploy/prod/prod-init.sql deleted file mode 100644 index 90d49c61..00000000 --- a/deploy/prod/prod-init.sql +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE IF NOT EXISTS conversations ( - conversation_id INTEGER NOT NULL, - message_id SERIAL, - sender TEXT NOT NULL, - content TEXT NOT NULL, - ts TIMESTAMP NOT NULL, - PRIMARY KEY (message_id) -); -CREATE TABLE IF NOT EXISTS feedback ( - mid INTEGER NOT NULL, - feedback_ts TIMESTAMP NOT NULL, - feedback TEXT NOT NULL, - feedback_msg TEXT, - incorrect BOOLEAN, - unhelpful BOOLEAN, - inappropriate BOOLEAN, - PRIMARY KEY (mid, feedback_ts), - FOREIGN KEY (mid) REFERENCES conversations(message_id) -); \ No newline at end of file diff --git a/deploy/prod/prod-install.sh b/deploy/prod/prod-install.sh index f6c1410d..bc3f9871 100755 --- a/deploy/prod/prod-install.sh +++ b/deploy/prod/prod-install.sh @@ -12,6 +12,18 @@ if [[ $exists != 'a2rchi-prod-pg-data' ]]; then docker volume create --name a2rchi-prod-pg-data fi +# create volume if it doesn't already exist for grafana data +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-prod-grafana-data` +if [[ $exists != 'a2rchi-prod-grafana-data' ]]; then + docker volume create --name a2rchi-prod-grafana-data +fi + +# fill-in variables in grafana files +export grafanapass=`cat A2rchi-prod/deploy/prod/secrets/grafana_password.txt` +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod/deploy/grafana/datasources.yaml +sed -i 's/GRAFANA_PASSWORD/'"${grafanapass}"'/g' A2rchi-prod/deploy/init.sql +unset grafanapass + # build base image; try to reuse previously built image cd A2rchi-prod/deploy/prod/ docker build -f ../dockerfiles/Dockerfile-base -t a2rchi-base:BASE_TAG ../.. @@ -21,10 +33,4 @@ echo "Starting docker compose" docker compose -f prod-compose.yaml up -d --build --force-recreate --always-recreate-deps # # secrets files are created by CI pipeline and destroyed here -# rm secrets/cleo_*.txt -# rm secrets/imap_*.txt -# rm secrets/sender_*.txt -# rm secrets/flask_uploader_app_secret_key.txt -# rm secrets/uploader_salt.txt -# rm secrets/openai_api_key.txt -# rm secrets/hf_token.txt +# rm secrets/*.txt diff --git a/deploy/prod/prod-stop.sh b/deploy/prod/prod-stop.sh index 28183d4a..be068528 100755 --- a/deploy/prod/prod-stop.sh +++ b/deploy/prod/prod-stop.sh @@ -1,5 +1,9 @@ #!/bin/bash -echo "Stop running docker compose" -cd A2rchi-prod/deploy/prod/ -docker compose -f prod-compose.yaml down +if [ -z "$(ls -A A2rchi-prod/deploy/prod/)" ]; then + echo "Deployment directory is empty; skipping docker compose down" +else + echo "Stop running docker compose" + cd A2rchi-prod/deploy/prod/ + docker compose -f prod-compose.yaml down +fi diff --git a/deploy/vanilla/compose.yaml b/deploy/vanilla/compose.yaml new file mode 100644 index 00000000..2e067d0e --- /dev/null +++ b/deploy/vanilla/compose.yaml @@ -0,0 +1,58 @@ +services: + chat: + image: chat:latest + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-chat + depends_on: + chromadb: + condition: service_healthy + environment: + RUNTIME_ENV: dev + OPENAI_API_KEY_FILE: /run/secrets/openai_api_key + HUGGING_FACE_HUB_TOKEN_FILE: /run/secrets/hf_token + secrets: + - openai_api_key + - hf_token + volumes: + - a2rchi-data:/root/data/ + logging: + options: + max-size: 10m + ports: + - 7861:7861 # host:container + restart: always + + chromadb: + image: chromadb:latest + build: + context: ../.. + dockerfile: deploy/dockerfiles/Dockerfile-chroma + environment: + RUNTIME_ENV: dev + ports: + - 8000:8000 # host:container + volumes: + - a2rchi-data:/chroma/chroma/ + logging: + options: + max-size: 10m + restart: always + # healthcheck originates from inside container; so use container port + healthcheck: + test: ["CMD", "curl", "-f", "http://0.0.0.0:8000/api/v1/heartbeat"] + interval: 15s + timeout: 10s + retries: 3 + start_period: 10s + start_interval: 5s + +volumes: + a2rchi-data: + external: true + +secrets: + openai_api_key: + file: secrets/openai_api_key.txt + hf_token: + file: secrets/hf_token.txt diff --git a/deploy/vanilla/install.sh b/deploy/vanilla/install.sh new file mode 100755 index 00000000..0eb05d48 --- /dev/null +++ b/deploy/vanilla/install.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# create volume if it doesn't already exist +exists=`docker volume ls | awk '{print $2}' | grep a2rchi-data` +if [[ $exists != 'a2rchi-data' ]]; then + docker volume create --name a2rchi-data +fi + +# start services +echo "Starting docker compose" +cd deploy/vanilla/ +docker compose -f compose.yaml up -d --build --force-recreate --always-recreate-deps diff --git a/pyproject.toml b/pyproject.toml index 792a6ec5..fe8c9d9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "A2rchi" +name = "a2rchi" version = "0.1.0" description = "An AI Augmented Research Chat Intelligence (A2rchi)" requires-python = ">=3.7" @@ -37,6 +37,7 @@ dependencies = [ "overrides==7.3.1", "pandas==2.1.0", "peft==0.5.0", + "piazza-api==0.14.0", "posthog==3.0.1", "psycopg2==2.9.9", "pulsar-client==3.2.0", @@ -61,13 +62,13 @@ dependencies = [ [tool.setuptools] packages = [ - "A2rchi", - "A2rchi.bin", - "A2rchi.chains", - "A2rchi.interfaces", - "A2rchi.interfaces.chat_app", - "A2rchi.interfaces.uploader_app", - "A2rchi.utils", + "a2rchi", + "a2rchi.bin", + "a2rchi.chains", + "a2rchi.interfaces", + "a2rchi.interfaces.chat_app", + "a2rchi.interfaces.uploader_app", + "a2rchi.utils", ] [build-system] diff --git a/test/test_chains.py b/test/test_chains.py index 384aab86..ec7fd6b5 100644 --- a/test/test_chains.py +++ b/test/test_chains.py @@ -1,6 +1,6 @@ -from A2rchi.chains.chain import Chain -from A2rchi.chains.models import DumbLLM, LlamaLLM, OpenAILLM -from A2rchi.utils.config_loader import Config_Loader +from a2rchi.chains.chain import Chain +from a2rchi.chains.models import DumbLLM, LlamaLLM, OpenAILLM +from a2rchi.utils.config_loader import Config_Loader from langchain.schema import AIMessage, HumanMessage, SystemMessage diff --git a/test/test_interfaces.py b/test/test_interfaces.py index 564d662f..9f893593 100644 --- a/test/test_interfaces.py +++ b/test/test_interfaces.py @@ -1,4 +1,4 @@ -from A2rchi.interfaces.cleo import Cleo +from a2rchi.interfaces.cleo import Cleo from time import time import threading