From a6306e230fff4cf28106e87925514d40a18a2b74 Mon Sep 17 00:00:00 2001 From: Aarav Borthakur Date: Mon, 24 Jul 2023 18:13:30 -0700 Subject: [PATCH] Add web server --- .github/workflows/store-embeddings.yml | 2 +- README.md | 22 ++++++++++ constants.py | 56 ++++++++++++-------------- main.py | 20 +++++++++ requirements.txt | 1 + generate_links.py => scripts/ingest.py | 43 +++++++++++++------- search.py | 20 +++++---- static/index.js | 55 +++++++++++++++++++++++++ static/styles.css | 32 +++++++++++++++ templates/index.jinja | 49 ++++++++++++++++++++++ 10 files changed, 246 insertions(+), 54 deletions(-) create mode 100644 README.md create mode 100644 main.py rename generate_links.py => scripts/ingest.py (86%) create mode 100644 static/index.js create mode 100644 static/styles.css create mode 100644 templates/index.jinja diff --git a/.github/workflows/store-embeddings.yml b/.github/workflows/store-embeddings.yml index 46064ea..704c3a4 100644 --- a/.github/workflows/store-embeddings.yml +++ b/.github/workflows/store-embeddings.yml @@ -13,7 +13,7 @@ jobs: - name: Install requirements run: pip install -r requirements.txt - name: Generate links - run: python3 generate_links.py --reset + run: python3 scripts/ingest.py --reset env: ROCKSET_API_KEY: ${{ secrets.ROCKSET_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..83ff360 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# Zelda GPT + +## Setup +```bash +export ROCKSET_API_KEY="" +export ROCKSET_API_SERVER="" +export OPENAI_API_KEY="" +pip3 install -r requirements.txt +``` + +## Data ingestion +``` +python3 scripts/ingest.py +``` + +## Starting the server +```python3 +python3 main.py +``` + +## Deployment +See [Heroku docs](https://devcenter.heroku.com/articles/github-integration#manual-deploys). \ No newline at end of file diff --git a/constants.py b/constants.py index 2a6e995..0681326 100644 --- a/constants.py +++ b/constants.py @@ -1,45 +1,39 @@ -from sys import argv -from time import sleep from os import getenv -from rockset import RocksetClient, Regions, exceptions +from rockset import RocksetClient, exceptions from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import Rockset as RocksetStore from sql import ingest_tranformation +rockset_api_server = getenv("ROCKSET_API_SERVER") rockset_api_key = getenv("ROCKSET_API_KEY") openai_api_key = getenv("OPENAI_API_KEY") -rockset = RocksetClient(Regions.rs2, rockset_api_key) +rockset = RocksetClient(rockset_api_server, rockset_api_key) -def collection_exists(): - try: - rockset.Collections.get(collection="hyrule-compendium-ai") - except exceptions.NotFoundException: - return False - return True +class Collection: + def __init__(self, workspace, name): + self.workspace = workspace + self.name = name + + def exists(self): + try: + rockset.Collections.get(collection=self.name) + except exceptions.NotFoundException: + return False + return True -def collection_is_ready(): - return rockset.Collections.get(collection="hyrule-compendium-ai").data.status == "READY" + def is_ready(self): + return rockset.Collections.get(collection=self.name).data.status == "READY" -def delete_collection(): - print("Deleting collection \"commons.hyrule-compendium-ai\"") - rockset.Collections.delete(collection="hyrule-compendium-ai") - -def create_collection(): - print("Creating collection \"commons.hyrule-compendium-ai\"") - rockset.Collections.create_s3_collection(name="hyrule-compendium-ai", field_mapping_query=ingest_tranformation) + def delete(self): + print(f"Deleting collection \"{self.workspace}.{self.name}\"") + rockset.Collections.delete(collection=self.name) + + def create(self): + print(f"Creating collection \"{self.workspace}.{self.name}\"") + rockset.Collections.create_s3_collection(name=self.name, field_mapping_query=ingest_tranformation) -if "--reset" in argv: - if collection_exists(): - delete_collection() - while collection_exists(): - sleep(1) - - create_collection() - while not collection_exists(): - sleep(1) - while not collection_is_ready(): - sleep(1) +collection = Collection("commons", "hyrule-compendium-ai") openai = OpenAIEmbeddings( openai_api_key=openai_api_key, @@ -48,7 +42,7 @@ def create_collection(): store = RocksetStore( rockset, openai, - "hyrule-compendium-ai", + collection.name, "text", "embedding" ) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..219228b --- /dev/null +++ b/main.py @@ -0,0 +1,20 @@ +from flask import Flask, render_template +from flask_socketio import SocketIO, send +from search import ask + +app = Flask(__name__) +app.config['SECRET_KEY'] = 'secret!' +socketio = SocketIO(app) + +@app.route("/") +def main(): + return render_template("index.jinja") + +@socketio.on("message") +def handle_message(question): + print('received question: ' + question) + send(ask(question)) + + +if __name__ == '__main__': + socketio.run(app, debug=True) diff --git a/requirements.txt b/requirements.txt index 0f02068..4c14be0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,3 +34,4 @@ typing-inspect==0.9.0 typing_extensions==4.7.1 urllib3==1.26.16 yarl==1.9.2 +eventlet==0.30.2 \ No newline at end of file diff --git a/generate_links.py b/scripts/ingest.py similarity index 86% rename from generate_links.py rename to scripts/ingest.py index 06e669e..9ac66d9 100644 --- a/generate_links.py +++ b/scripts/ingest.py @@ -1,9 +1,21 @@ -from requests import get, exceptions -from bs4 import BeautifulSoup +from sys import argv +from time import sleep from requests import get, exceptions from bs4 import BeautifulSoup from langchain.text_splitter import RecursiveCharacterTextSplitter -from constants import store, rockset as rs +from constants import store, collection, rockset as rs + +if "--reset" in argv: + if collection.exists(): + collection.delete() + while collection.exists(): + sleep(1) + + collection.create() + while not collection.exists(): + sleep(1) + while not collection.is_ready(): + sleep(1) text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1000, @@ -22,6 +34,14 @@ def __init__(self, init_value=None): self.first = LinkNode(init_value, None) if init_value is not None else None self.last = self.first + def _add(self, link): + node = LinkNode(link) + if self.first is None and self.last is None: # empty queue + self.first = node + else: + self.last.next = node + self.last = node + def remove(self): if self.first is self.last: # one item in queue link = self.first.link @@ -31,21 +51,13 @@ def remove(self): prev_first = self.first self.first = self.first.next return prev_first.link - - def add(self, link): - node = LinkNode(link) - if self.first is None and self.last is None: # empty queue - self.first = node - else: - self.last.next = node - self.last = node def is_empty(self): return self.first is None def add_elem_links(self, a_elems): for i in a_elems: - self.add(i["href"]) + self._add(i["href"]) def __str__(self) -> str: if self.is_empty(): @@ -97,11 +109,14 @@ def _is_category(self, link): def _scrape(self, link): soup = BeautifulSoup(get(link).text, "html.parser") - if self._is_category(link): # we do not need to generate embeddings for this page + if self._is_category(link): + # we do not need to generate embeddings for this page, + # but we still need to add it to the collection to + # make sure we don't scrape it again rs.Documents.add_documents( collection="hyrule-compendium-ai", data=[{ - "source": link, # make sure we do not scrape this page again + "source": link, "embedding": None }] ) diff --git a/search.py b/search.py index 23d652c..c431312 100644 --- a/search.py +++ b/search.py @@ -3,21 +3,25 @@ from langchain.chat_models import ChatOpenAI from langchain.chains import RetrievalQA -chat_bot = ChatOpenAI(model_name="gpt-3.5-turbo-0613", temperature=0.99) +chat_bot = ChatOpenAI(model_name="gpt-3.5-turbo-0613", temperature=0.8) chat_bot( [ - SystemMessage(content='''The context I have you is about the "Legend of Zelda" series. The questions I am about to ask you are about this game series. Use the context given and your knowledge about the games to answer my questions. You are a Goddess that knows everything about the fictional world of Hyrule.''') + SystemMessage(content='''The context I have you is about the "Legend of Zelda" series. The questions I am about to ask you are about this game series. Use the context given and your knowledge about the games to answer my questions.''') ] ) -qa_chain = RetrievalQA.from_chain_type(chat_bot, retriever=store.as_retriever()) + +retriever = store.as_retriever() +retriever.search_kwargs = {"where_str": "embedding IS NOT NULL"} + +qa_chain = RetrievalQA.from_chain_type( + chat_bot, + retriever=retriever +) def ask(question): - if question: - print( - qa_chain({"query": question})["result"] - ) + return qa_chain({"query": f"""Answer my question and be sure to make your answer as long as possible. Question: {question}"""})["result"] if __name__ == "__main__": while True: - ask(input("question: ")) \ No newline at end of file + print(ask(input("question: "))) \ No newline at end of file diff --git a/static/index.js b/static/index.js new file mode 100644 index 0000000..e24e8aa --- /dev/null +++ b/static/index.js @@ -0,0 +1,55 @@ +var socket = io(); + +socket.on("connect", () => { + let askBtn = document.getElementById("ask"); + let questionBox = document.getElementById("question"); + let answerContainer = document.getElementById("answer-container"); + let thinkingMsg = document.getElementById("thinking"); + let responseBox = document.getElementById("response"); + + let ask = () => { + let question = document.getElementById("question").value; + socket.send(question); + thinkingMsg.classList.remove("hidden"); + responseBox.classList.add("hidden"); + answerContainer.classList.remove("hidden"); + }; + + let answer = (response) => { + responseBox.innerText = response; + thinkingMsg.classList.add("hidden"); + responseBox.classList.remove("hidden"); + }; + + + questionBox.addEventListener("input", () => { + if (questionBox.value) { + askBtn.removeAttribute("disabled") + } else { + askBtn.setAttribute("disabled", "") + } + }); + + questionBox.addEventListener("keypress", (event) => { + if (event.key === "Enter") { + event.preventDefault(); + ask(); + } + }); + + askBtn.addEventListener("click", ask); + + socket.on("message", answer); + + console.log("Connected!"); +}); + +var ellipses = document.getElementById("ellipses"); + +setInterval(() => { + if (ellipses.innerText.length == 2) { + ellipses.innerText = ""; + } else { + ellipses.innerText += "."; + } +}, 1000); \ No newline at end of file diff --git a/static/styles.css b/static/styles.css new file mode 100644 index 0000000..0470f5a --- /dev/null +++ b/static/styles.css @@ -0,0 +1,32 @@ +.input-group, #answer-container { + max-width: 75%; +} + +h1.gold { + color: gold; +} + +button.gold { + background-color: gold +} + +button.gold:disabled { + background-color: lightgoldenrodyellow +} + +button.gold:hover { + background-color: gold +} + +.hidden { + display: none; +} + +.showing { + display: block; +} + +#star { + text-align:right; + margin-right: 8px; +} \ No newline at end of file diff --git a/templates/index.jinja b/templates/index.jinja new file mode 100644 index 0000000..a363260 --- /dev/null +++ b/templates/index.jinja @@ -0,0 +1,49 @@ + + + ZeldaGPT + + + + +
+ Star +
+ +
+

+ ZeldaGPT +

+
+ +
+ + + +
+ + + + + +
+

Built with ❤️ by Aarav Borthakur

+
+ + + + + + \ No newline at end of file