-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathfull_evaluate.py
144 lines (120 loc) · 4.06 KB
/
full_evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
This connects everything:
1) Voice activity detection to find start&end of commands
2) Command detection to identify the command
"""
import sys, os
import numpy
import h5py
import math
import scipy
from time import time
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import matplotlib.pyplot as plt
numpy.random.seed(1)
print 'Command detection:'
print ' Loading command recognition training set...'
infile = sys.argv[1]
f = h5py.File(infile, 'r')
labels = f['labels'].value
nsamples, imgh, imgw = f['data'].shape
imgshape = (imgh, imgw)
#for i in numpy.random.randint(len(labels), size=5):
# print f['data'][i,:,:]
data = f['data'].value.reshape((len(labels), -1))
X_train = data
print(" Dimensionality reduction with PCA...")
n_components = 16
t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
whiten=True).fit(X_train)
print(" Dimensionality reduction done (%0.3fs)" % (time() - t0))
X_train_pca = pca.transform(X_train)
print(" Training command classifier with SVM-RB ...")
C = 0.1
gamma = 0.05
t0 = time()
clf = SVC(C=C, kernel='rbf', gamma=gamma, probability=True)
clf = clf.fit(X_train_pca, labels)
#clf = RandomForestClassifier(n_estimators=100)
#clf = clf.fit(X_train_pca, labels)
print(" Training command classifier done (%0.3fs)" % (time() - t0))
def detect_meaning(voicepart):
img = voicepart[:,8:]
# reshape to common shape (e.g. 24x24 pixels, 256 colors)
# now normalise to 1 and take logarithms
img = (numpy.log(voicepart / voicepart.max() * 0.99 + 1e-10) + 255).astype('uint8')
img = scipy.misc.imresize(img, size=imgshape, mode='F')
probs = clf.predict_proba(pca.transform(img.reshape(1,-1)))[0]
print ' probabilities:', probs
second, first = probs.argsort()[-2:]
if probs[first] + probs[second] < 0.75:
return img, 'unsure'
if probs[first] / probs[second] > 2:
return img, first
label = (first, '%.2f' % probs[first], second, '%.2f' % probs[second])
#label = clf.predict(pca.transform(img.reshape(1,-1)))
return img, label
print 'Voice activity detection:'
print ' loading voice activity detector training set ...'
with h5py.File('voicedetect-training.hdf5') as f:
X = f['x7'].value
Y = f['y'].value
#print Y.mean(), Y.shape, X.shape
print ' training voice activity detector ...'
t0 = time()
vadclf = RandomForestClassifier(n_estimators=40)
#clf = MLPClassifier(hidden_layer_sizes=(10,))
vadclf = vadclf.fit(X, Y)
print ' training voice activity detector done (%.1fs)' % (time() - t0)
print 'Running:'
filename = sys.argv[2]
print ' loading test data set...', filename
f = h5py.File(filename, 'r')
labels = f['labels'].value
data = f['data'].value
print ' ', data.shape, dict(f.attrs)
look_back_seconds = 2
# files are 150000 samples long and cover ~20minutes, so it is
# approximately 140 samples per second
look_back = int(140 * look_back_seconds)
stride = 7
print 'memory from the last %d frames' % look_back
indices = []
istart = None
ioff = None
j = 1
for i in range(look_back, len(data)):
dataset = data[i-look_back:i+1,:64][::-1][::stride][::-1]
dataset2 = (numpy.log(dataset + 1e-3) - numpy.log(1e-3)) / 30.
yprob = vadclf.predict_proba(dataset2.flatten().reshape((1,-1)))
#print yprob[0,1]
y = yprob[0,1] > 0.625
#print i, labels[i], y, numpy.log10(dataset[-1,::8]).astype(int)
if y:
if istart is None:
# starting segment
istart = i
#print ' starting segment:', istart
# continuing segment
ioff = i
else:
if ioff and istart and i > ioff + 3:
if ioff > istart + 10:
# emit segment
img, label = detect_meaning(data[istart:ioff,:])
print ' found some activity:', istart, ioff, label
plt.title(label)
plt.imshow(img, cmap='RdBu')
plt.savefig('example.%d.png' % j, bbox_inches='tight')
plt.close()
ioff = None
istart = None
j += 1
#print labels[i], y