Skip to content

Commit

Permalink
V1.1: Add speech-to-text (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
Currie32 authored Aug 21, 2023
1 parent 5ee528d commit afa80b5
Show file tree
Hide file tree
Showing 9 changed files with 294 additions and 69 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ COPY pages pages
# Create a virtual environment and activate it
RUN python -m venv venv
ENV PATH="/app/venv/bin:$PATH"

RUN pip3 install --no-cache-dir -r requirements.txt

# Specify the command to run the app
Expand Down
29 changes: 26 additions & 3 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
import base64
import io

import dash_bootstrap_components as dbc
from dash import Dash, html, page_container
from flask import send_from_directory
from flask import Flask, request, send_from_directory

from footer import footer

server = Flask(__name__)
app = Dash(
__name__,
use_pages=True,
pages_folder="pages",
external_stylesheets=[dbc.icons.BOOTSTRAP, dbc.themes.BOOTSTRAP],
server=server,
)
app.config.suppress_callback_exceptions = True
server = app.server


@server.route("/robots.txt")
Expand Down Expand Up @@ -60,9 +64,28 @@ def serve_sitemap():
footer,
],
)
]
],
)


@server.route("/save_audio_recording", methods=["POST"])
def save_audio_recording():
try:
data = request.get_json()
audio_data = data["audio_data"]
# Decode the Base64 audio data
audio_bytes = base64.b64decode(audio_data)

# Save the audio recording
with io.BytesIO(audio_bytes) as f:
with open("audio_recording.wav", "wb") as audio_file:
audio_file.write(f.read())

return "Audio data received successfully", 200

except Exception:
return "An error occurred", 500


if __name__ == "__main__":
app.run_server(debug=True, port=8080)
14 changes: 11 additions & 3 deletions assets/app.css
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ body {
font-size: 22px;
margin-left: 10px;
}
#button-record-audio {
margin-bottom: 5px;
}
#button-start-conversation {
background-color: #0061F3;
color: white;
Expand Down Expand Up @@ -133,13 +136,18 @@ body {
#title {
margin: 0px;
}
#user-input {
#user-response {
display: flex;
margin: 0px 0px 30px;
width: 100%;
}
#user-input.form-control {
#user-response-text {
height: 80px;
margin-right: 10px;
}
#user-response-text.form-control {
border: 1px solid rgba(7, 76, 179, 0.9);
}
#user-input::placeholder {
#user-response-text::placeholder {
color: #aaa;
}
95 changes: 86 additions & 9 deletions assets/chat_request.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,67 @@
import os
import re
import time
from typing import Dict, List

import openai
import requests
from dash import Input, Output, callback, no_update
from tenacity import retry, stop_after_attempt, wait_random_exponential

openai.api_key = os.environ.get("OPENAI_KEY")


def get_assistant_message(messages):
@callback(
Output("user-response-text", "value", allow_duplicate=True),
Output("loading", "style", allow_duplicate=True),
Output("check-for-audio-file", "data", allow_duplicate=True),
Input("check-for-audio-file", "data"),
prevent_initial_call=True,
)
def convert_audio_recording_to_text(check_for_audio_file: bool) -> str:
"""
Convert the audio recording from the user into text using OpenAI's
Whisper-1 model.
chat_response = chat_completion_request(messages)
Params:
check_for_audio_file: Whether to check for the audio recording file.
Returns:
The text of the user's audio recording.
The style of the loading icons.
Stop checking for the user's audio recording.
"""

audio_recording = "audio_recording.wav"

while check_for_audio_file:
if os.path.exists(audio_recording):

audio_file = open(audio_recording, "rb")
os.remove(audio_recording)
transcript = openai.Audio.transcribe("whisper-1", audio_file)
message_user = transcript.to_dict()["text"]

return message_user, {"display": "none"}, False

# Wait 0.1 seconds before looking for the audio file again
time.sleep(0.1)

return no_update


def get_assistant_message(messages: List[Dict[str, str]]) -> str:
"""
Get and process the assistant's (OpenAI's model) message to continue the conversation.
Params:
messages: The conversation history between the user and the chat model.
Returns:
The message from the assistant.
"""

chat_response = _chat_completion_request(messages)
message_assistant = chat_response.json()["choices"][0]["message"]["content"]

# Remove space before "!" or "?"
Expand All @@ -20,12 +71,26 @@ def get_assistant_message(messages):


@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, model="gpt-3.5-turbo-0613"):
def _chat_completion_request(messages: List[Dict[str, str]]) -> Dict:
"""
Request a response to the user's statement from one of OpenAI's chat models.
Params:
messages: The conversation history between the user and the chat model.
Returns:
A response from OpenAI's model to the user's statement.
"""

headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + openai.api_key,
}
json_data = {"model": model, "messages": messages, "temperature": 1.5}
json_data = {
"model": "gpt-3.5-turbo-0613",
"messages": messages,
"temperature": 1.5,
}
try:
response = requests.post(
"https://api.openai.com/v1/chat/completions",
Expand All @@ -34,16 +99,28 @@ def chat_completion_request(messages, model="gpt-3.5-turbo-0613"):
)
return response
except Exception as e:
print("Unable to generate ChatCompletion response")
print(f"Exception: {e}")
return e


def system_content(
language_learn, language_known, setting, point_in_conversation="Start"
):
conversation_setting: str,
language_learn: str,
language_known: str,
) -> str:
"""
Write the content message for the system as part of call OpenAI's chat completion API.
This provide OpenAI's model with some context about the conversation.
Params:
conversation_setting: The setting of the conversation between the user and OpenAI's model.
language_learn: The language that the user wants to learn.
language_known: The language that the user speaks.
Returns:
The content message for the system.
"""

content = f"{point_in_conversation} a conversation about {setting} in {language_learn}. \
content = f"Start a conversation about {conversation_setting} in {language_learn}. \
Provide one statement in {language_learn}, then wait for my response. \
Do not write in {language_known}. \
Always finish your response with a question. \
Expand Down
2 changes: 1 addition & 1 deletion callbacks/conversation_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def start_conversation_button_disabled(

has_setting = (
(conversation_setting == "other") & (conversation_setting_custom is not None)
) | (conversation_setting != "other")
) | ((conversation_setting != "other") & (conversation_setting is not None))

return not (has_two_languages & has_different_languages & has_setting)

Expand Down
62 changes: 52 additions & 10 deletions callbacks/display_components.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, List, Tuple

from dash import Input, Output, State, callback, callback_context
from dash import Input, Output, State, callback, callback_context, html


@callback(
Expand All @@ -27,7 +27,6 @@ def display_conversation_helpers(
{"display": "block"},
{
"margin": "20px 0px 0px",
"float": "right",
},
)

Expand All @@ -47,29 +46,64 @@ def display_user_input(conversation: List) -> Dict[str, str]:
"""

if conversation:
return {"display": "block"}
return {"display": "flex"}

return {"display": "none"}


@callback(
Output("button-record-audio", "children"),
Output("check-for-audio-file", "data", allow_duplicate=True),
Input("button-record-audio", "n_clicks"),
prevent_initial_call=True,
)
def is_user_recording_audio(button_record_audio_n_clicks: int) -> Tuple[html.I, bool]:
"""
Change the icon for the audio recording button based on if
a recording is taking place or not. Also, check for the audio
recording after it has been completed.
Params:
button_record_audio_n_clicks: Number of times the button to record the user's audio has been clicked.
Returns:
The icon of the button.
Whether to check for a file of the user's audio recording.
"""

# Recording taking place
if button_record_audio_n_clicks % 2 == 1:
return html.I(className="bi bi-headphones"), False

# Not recording right now
else:
return html.I(className="bi bi-mic-fill"), True


@callback(
Output("loading", "style", allow_duplicate=True),
Input("button-start-conversation", "n_clicks"),
Input("user-response", "n_submit"),
State("user-response", "value"),
Input("button-submit-response-text", "n_clicks"),
Input("user-response-text", "n_submit"),
Input("button-record-audio", "n_clicks"),
State("user-response-text", "value"),
prevent_initial_call="initial_duplicate",
)
def loading_visible(
button_start_conversation_n_clicks: int,
user_response_n_submits: int,
button_submit_text_n_clicks: int,
user_response_text_n_submits: int,
user_response_audio_n_clicks: int,
user_response_text: str,
) -> Dict[str, str]:
"""
Whether to make the loading icons visible.
Params:
button_start_conversation_clicks: Number of time the start conversation button was clicked
user_response_n_submits: Number of times the user response was submitted.
button_start_conversation_n_clicks: Number of time the start conversation button was clicked.
button_submit_text_n_clicks: Number of times the button to submit the user's text reponse was clicked.
user_response_text_n_submits: Number of times the user's text response was submitted (by clicking enter/return).
user_response_audio_n_clicks: Number of times the button to record the user's audio was clicked.
user_response_text: The text of the user_response field when it was submitted.
Returns:
Expand All @@ -83,8 +117,16 @@ def loading_visible(
if button_start_conversation_n_clicks:
return {"display": "flex"}

elif triggered_input_id == "user-response":
if user_response_n_submits is not None and user_response_text:
if triggered_input_id == "button-submit-response-text":
if button_submit_text_n_clicks:
return {"display": "flex"}

elif triggered_input_id == "user-response-text":
if user_response_text_n_submits is not None and user_response_text:
return {"display": "flex"}

elif triggered_input_id == "button-record-audio":
if user_response_audio_n_clicks:
return {"display": "flex"}

return {"display": "none"}
2 changes: 1 addition & 1 deletion callbacks/placeholder_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


@callback(
Output("user-response", "placeholder"),
Output("user-response-text", "placeholder"),
Input("language-known", "value"),
Input("language-learn", "value"),
)
Expand Down
Loading

0 comments on commit afa80b5

Please sign in to comment.