-
Notifications
You must be signed in to change notification settings - Fork 4
/
audio.py
100 lines (77 loc) · 2.49 KB
/
audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from sys import byteorder
from array import array
import pyaudio
class AudioRecorder:
def __init__(self):
self.rate = 16000
self.threshold = 0.03 # silence threshold {Need to experiment with it}
self.chunk_size = 4096
self.format = pyaudio.paFloat32
self._pyaudio = pyaudio.PyAudio()
def isSilent(self, data):
#Returns true if below the silence threshold
return max(data) < self.threshold
def normalize(self, data):
#Averages the volume {Had to mess with a lot of examples to get this right}
times = float(0.5) / max(abs(i) for i in data)
r = array('f')
for i in data:
r.append(i * times)
return r
def trim(self, data):
# Trim the blanks at the start and end
def _trim(data):
started = False
r = array('f')
for i in data:
if not started and abs(i) > self.threshold:
started = True
r.append(i)
elif started:
r.append(i)
return r
#first trim the left side
data = _trim(data)
data.reverse()
#then trim the right side
data = _trim(data)
data.reverse()
return data
def addSilence(self, data):
#adds silence to the start and end of 0.1 seconds
r = array('f', [0 for i in range(int(0.1 * self.rate))])
r.extend(data)
r.extend([0 for i in range(int(0.1 * self.rate))])
return r
"""
Records words from the microphone using pyaudio in paFloat32 format and
16000Hz sampling rate.
Returns data as an array of signed floats.
"""
def record(self):
stream = self._pyaudio.open(format=self.format, channels=1, rate=self.rate, input=True, output=True, frames_per_buffer=self.chunk_size)
numSilent = 0
started = False
r = array('f')
while 1:
#has to be little endian and signed short
data = array('f', stream.read(self.chunk_size))
if byteorder == 'big':
data.byteswap()
r.extend(data)
silent = self.isSilent(data) #check silent to add blanks
if silent and started:
numSilent += 1
elif not silent and not started:
started = True
if started and numSilent > 30: #if there are 30 silences, break. doesnt handle long sentences.
break
width = self._pyaudio.get_sample_size(self.format)
stream.stop_stream()
stream.close()
r = self.normalize(r) #normalizes the audio
r = self.trim(r)
r = self.addSilence(r)
return r, width
def terminate(self):
self._pyaudio.terminate()