-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathassistant.py
237 lines (190 loc) · 6.67 KB
/
assistant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import pvporcupine
import pvcobra
import pyaudio
import time
import openai
import struct
import wave
import os
from dotenv import load_dotenv
from elevenlabslib import *
from pymilvus import Milvus, DataType
from sentence_transformers import SentenceTransformer
# Load environment variables from the .env file
load_dotenv()
WAKE_WORD = 'porcupine'
WAVE_OUTPUT_FILENAME = 'output.wav'
MAX_RECORD_SECONDS = 5 # Maximum recording duration in seconds
PICOVOICE_API_KEY = os.environ.get('PICOVOICE_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
ELEVENLABS_API_KEY = os.environ.get('ELEVENLABS_API_KEY')
MILVUS_HOST = os.environ.get('MILVUS_HOST')
MILVUS_PORT = os.environ.get('MILVUS_PORT')
# Prepare Milvus for long-term storage
milvus = Milvus(host=MILVUS_HOST, port=MILVUS_PORT)
model = SentenceTransformer('all-MiniLM-L6-v2')
# Define schema of the collection
schema = {
"fields": [
{"name": "prompt", "type": DataType.STRING},
{"name": "response", "type": DataType.STRING},
{"name": "response_vector", "type": DataType.FLOAT_VECTOR,
"params": {"dim": 768}},
{"name": "prompt_vector", "type": DataType.FLOAT_VECTOR, "params": {"dim": 768}},
],
"segment_row_limit": 10000,
"auto_id": True
}
collection_name = 'chat_history'
if not milvus.has_collection(collection_name):
milvus.create_collection(collection_name, schema)
# Create Porcupine instance
porcupine = pvporcupine.create(
access_key=PICOVOICE_API_KEY, keywords=[WAKE_WORD])
# create Cobra instance
cobra = pvcobra.create(access_key=PICOVOICE_API_KEY)
# Initialize OpenAI API
openai.api_key = OPENAI_API_KEY
CHATGPT_MODEL = "gpt-3.5-turbo"
# Initialize PyAudio
pa = pyaudio.PyAudio()
stream = pa.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length,
)
# Initialize recording
frames = []
def record_audio():
print('Recording audio...')
is_speaking = False
start_time = time.time()
while True:
# Read audio data from the microphone
pcm = stream.read(porcupine.frame_length)
cobra_pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
# Process the audio data using Cobra
voice_activity = cobra.process(cobra_pcm)
# Check if voice activity is detected
if voice_activity > 0.5:
is_speaking = True
frames.append(pcm)
else:
frames.append(pcm)
is_speaking = False
# Check for recording timeout
elapsed_time = time.time() - start_time
if elapsed_time >= MAX_RECORD_SECONDS:
frames.append(pcm)
print('Recording timeout reached.')
break
if elapsed_time >= 2 and not is_speaking:
print('Silence detected.')
break
# Save recorded audio to file
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(pa.get_sample_size(pyaudio.paInt16))
wf.setframerate(porcupine.sample_rate)
wf.writeframes(b''.join(frames))
wf.close()
print('Recording complete.')
def transcribe_audio():
# Transcribe audio file using Whisper API
print('Transcribing audio...')
with open(WAVE_OUTPUT_FILENAME, 'rb') as audio_file:
response = openai.Audio.transcribe("whisper-1", audio_file)
# Extract transcription result
text = response['text']
print('Transcription:', text)
return text.strip()
def send_to_chatgpt(text):
response = openai.ChatCompletion.create(
model=CHATGPT_MODEL,
n=1,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": text},
])
# Extract the response text
response_text = response['choices'][0]['message']['content']
print('Response from ChatGPT:', response_text)
# Return the response text
return response_text.strip()
def synthesize_and_play_audio(text):
# Define the Eleven Labs Voice Synthesis API endpoint and headers
user = ElevenLabsUser(ELEVENLABS_API_KEY)
# This is a list because multiple voices can have the same na
voice = user.get_voices_by_name("Bella")[0]
voice.generate_and_play_audio(text, playInBackground=False)
for historyItem in user.get_history_items():
if historyItem.text == text:
# The first items are the newest, so we can stop as soon as we find one.
historyItem.delete()
break
# Parse Commands
def parseCommand(text):
commands = ["Persona", "Help", "Quit", "Start"]
match text:
case "Help.":
synthesize_and_play_audio(
"Available commands are: Persona, Help, Quit, Start")
case "Persona.":
return
case "Quit.":
return
case "Start.":
synthesize_and_play_audio(
"Please give a name for the project you want to create.")
record_audio()
text = transcribe_audio()
text.replace(" ", "_")
text.replace(".", "")
text.replace("?", "")
collection_name = text
if not milvus.has_collection(collection_name):
milvus.create_collection(collection_name, schema)
try:
print("Listening for wake word '{}'...".format(WAKE_WORD))
while True:
# Read audio data from the microphone
pcm = stream.read(porcupine.frame_length)
pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
# Process the audio data using Porcupine
result = porcupine.process(pcm)
# Check if the wake word was detected
if result == 0:
print("Wake word detected!")
record_audio()
text = transcribe_audio()
if text == 'Quit.':
break
parseCommand(text)
# Send transcribed text to ChatGPT API
response_text = send_to_chatgpt(text)
# Synthesize and play audio response
synthesize_and_play_audio(response_text)
# Re-initialize PyAudio
stream = pa.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length,
)
# Clear recording buffer
frames = []
# Start listening for wake word again
print("Listening for wake word '{}'...".format(WAKE_WORD))
except KeyboardInterrupt:
print("Stopping...")
finally:
# Clean up resources
stream.stop_stream()
stream.close()
pa.terminate()
porcupine.delete()
cobra.delete()
milvus.close()