-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvoice recognition.py
119 lines (102 loc) · 3.97 KB
/
voice recognition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import speech_recognition as sr
import pyaudio
import pyautogui
import time
import webbrowser
import win32com.client
from google.cloud import speech_v1p1beta1 as speech
import os
import base64
import threading
# Initialize PyAudio for audio input
audio = pyaudio.PyAudio()
# Initialize the Speech client with service account credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "agile-infinity-419609-0d76c5aa6ca2.json"
client = speech.SpeechClient()
# Define the command actions
def enable_bluetooth():
bt_shell = win32com.client.Dispatch("WScript.Shell")
bt_shell.SendKeys('{F15}')
def disable_bluetooth():
bt_shell = win32com.client.Dispatch("WScript.Shell")
bt_shell.SendKeys('{F16}')
def open_gmail():
webbrowser.open("https://mail.google.com")
def open_weather():
webbrowser.open("https://www.weather.com")
def quit_process():
global running
print("Quitting process...")
running = False
command_actions = {
"open mail": open_gmail,
"check mail": open_gmail,
"view mail": open_gmail,
"open weather": open_weather,
"check weather": open_weather,
"view weather": open_weather,
"enable bluetooth": enable_bluetooth,
"turn on bluetooth": enable_bluetooth,
"disable bluetooth": disable_bluetooth,
"turn off bluetooth": disable_bluetooth,
"quit": quit_process,
"stop": quit_process
}
# Variable to control the loop
running = True
# Flag to indicate if processing should continue
processing = True
# Function to process audio
def process_audio(audio_data):
global processing
try:
if processing:
audio_content = audio_data.get_raw_data(convert_rate=16000, convert_width=2) # Get raw audio data with correct format
audio_content_base64 = base64.b64encode(audio_content).decode("utf-8") # Encode audio data to base64
audio = speech.RecognitionAudio(content=audio_content_base64) # Create RecognitionAudio object
config = speech.RecognitionConfig(encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US") # Create RecognitionConfig object
response = client.recognize(request={"config": config, "audio": audio}) # Send request to Google Speech-to-Text API
print("Response:", response) # Print response for debugging
if response.results:
command = response.results[0].alternatives[0].transcript.lower().strip()
print("Command:", command)
if command == "quit":
quit_process()
elif command in command_actions:
command_actions[command]()
else:
print("Command not recognized.")
else:
print("No speech recognized.")
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
# Main loop
def listen_for_speech():
global processing
recognizer = sr.Recognizer()
with sr.Microphone() as source:
print("Listening for speech...")
while running:
try:
audio_data = recognizer.listen(source)
if audio_data:
print("Speech detected!")
threading.Thread(target=process_audio, args=(audio_data,)).start()
processing = False # Stop processing after the first speech is detected
except KeyboardInterrupt:
quit_process()
break
# Function to start audio processing
def start_process():
global running
print("Transcription started... Press 'Ctrl + Q' to stop.")
threading.Thread(target=listen_for_speech).start()
# Function to stop audio processing
def stop_process():
global running
print("Transcription stopped.")
running = False
# Start the process
start_process()