-
Notifications
You must be signed in to change notification settings - Fork 0
/
voiceassistant.py
144 lines (130 loc) · 5.05 KB
/
voiceassistant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
# environment variables from .env file
import dotenv
# proper log messages as output
import logging
# speech to text module
import speech_recognition as sr
# text to speech module
import pyttsx3
# openai api module to ask chatgpt
import openai
# ----------- configure and init modules -----------
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
level=logging.INFO
)
dotenv.load_dotenv()
r = sr.Recognizer()
# configure used openai model
openai_model = "gpt-3.5-turbo"
# openai api key should be stored in .env file next to this file with OPENAI_API_KEY=the-key
openai.api_key = os.getenv('OPENAI_API_KEY')
engine = pyttsx3.init()
# ----------- configure voice assistant -----------
# activation name (small letters)
assistant_name = "alberto"
# timeout in seconds when no noise
timeout_listen_per_round = 10
# minimal seconds listened on input (name must fit)
min_timeout_listen_on_voice = 5
# dynamic seconds listened on input (x listeningMode)
max_timeout_factor_listen_on_voice = 5
# when 1 no openai api call instead a fix text is returned
test_mode = 0
# ----------- Language and keyword config -----------
# output speech - language
engine.setProperty("voice", "german")
# input speech - language
speech_to_text_lang = "de-DE"
# Message when activated by name
confirm_listening_text = "Ja ich höre"
# assistant_name + this string signals stop listening
end_keyword = " ende"
# Message when deactivated
confirm_stop_text = "Es war mir eine Ehre zu dienen"
# Trigger long question mode
long_input_keyword = "lange frage"
# Message when long question mode activated
confirm_long_input_text = "OK ich höre dir länger zu"
# Trigger program exit
program_exit_keyword = "beende dich"
# Message when long question mode activated
confirm_program_exit_text = "Lebe lang und in Frieden"
# key word to bring running assistant into test mode
test_mode_keyword = "testmodus"
# the fixed message for test mode
test_message = "Das ist nur eine Testausgabe"
# Function to convert text to speech
def speak_text(command):
engine.say(command)
engine.runAndWait()
# Function for invoking openai model
def get_openai_response(prompt):
if test_mode == 1:
return test_message
else:
completion = openai.chat.completions.create(
model=openai_model,
messages=[
{
"role": "user",
"content": prompt,
},
],
)
return completion.choices[0].message.content
logging.info("Voice Assistant is listening...")
listening_mode = 0
running = True
while running:
try:
# use the microphone as source for input.
with sr.Microphone() as source_mic:
# adjust the energy threshold based on
# the surrounding noise level
logging.info("adjust noise")
r.adjust_for_ambient_noise(source_mic, duration=0.2)
# listens for the user's input
logging.info("Listening in mode " + str(listening_mode))
captured_audio = r.listen(
source_mic,
timeout_listen_per_round,
listening_mode * max_timeout_factor_listen_on_voice + min_timeout_listen_on_voice
)
logging.info("audio captured")
# Using google to recognize audio
user_input = r.recognize_google(captured_audio, language=speech_to_text_lang)
logging.info("Captured text:" + user_input)
if listening_mode == 0:
if user_input.lower() in [assistant_name]:
logging.info("Voice Assistant activated")
speak_text(confirm_listening_text)
listening_mode = 1
test_mode = 0
else:
if user_input.lower() in [assistant_name + end_keyword]:
logging.info("Voice Assistant sleeps...")
speak_text(confirm_stop_text)
listening_mode = 0
elif user_input.lower() in [program_exit_keyword]:
logging.info("Voice Assistant exiting...")
speak_text(confirm_program_exit_text)
running = False
elif user_input.lower() in [long_input_keyword]:
logging.info("Voice Assistant extending input length")
speak_text(confirm_long_input_text)
listening_mode = 3
elif user_input.lower() in [test_mode_keyword]:
logging.info("Voice Assistant going to test mode")
speak_text(test_mode_keyword)
test_mode = 1
else:
logging.info("Asking " + openai_model)
openaiResponse = get_openai_response(user_input)
logging.info("text to speech:" + openaiResponse)
speak_text(openaiResponse)
except sr.RequestError as e:
logging.error("Could not request results; {0}".format(e))
except sr.UnknownValueError as e:
logging.error("unknown error: {0}".format(e))