forked from ml-lab/DeepVideoAnalytics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlopq_tests.py
80 lines (71 loc) · 2.96 KB
/
lopq_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sys,os
sys.path.append(os.path.dirname(__file__))
from dvalib import external_indexed
try:
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import PCA
from lopq import LOPQModel, LOPQSearcher
from lopq.eval import compute_all_neighbors, get_recall
from lopq.model import eigenvalue_allocation
except:
pass
def load_oxford_data():
from lopq.utils import load_xvecs
data = load_xvecs('repos/lopq/data/oxford/oxford_features.fvecs')
return data
def pca(data):
"""
A simple PCA implementation that demonstrates how eigenvalue allocation
is used to permute dimensions in order to balance the variance across
subvectors. There are plenty of PCA implementations elsewhere. What is
important is that the eigenvalues can be used to compute a variance-balancing
dimension permutation.
"""
count, D = data.shape
mu = data.sum(axis=0) / float(count)
summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), data, np.zeros((D, D)))
A = summed_covar / (count - 1) - np.outer(mu, mu)
eigenvalues, P = np.linalg.eigh(A)
permuted_inds = eigenvalue_allocation(2, eigenvalues)
P = P[:, permuted_inds]
return P, mu
def main(input_dir='/Users/aub3/temptest/gtin/',output_dir="/Users/aub3/temptest/products"):
products = external_indexed.ProductsIndex(path=output_dir)
# products.prepare(input_dir)
products.build_approximate()
data = products.data
# data = load_oxford_data()
print data.shape
pca_reduction = PCA(n_components=32)
pca_reduction.fit(data)
data = pca_reduction.transform(data)
print data.shape
P, mu = pca(data)
data = data - mu
data = np.dot(data,P)
train, test = train_test_split(data, test_size=0.2)
print train.shape,test.shape
nns = compute_all_neighbors(test, train)
m = LOPQModel(V=16, M=8)
m.fit(train, n_init=1)
print "fitted"
searcher = LOPQSearcher(m)
print "adding data"
searcher.add_data(train)
recall, _ = get_recall(searcher, test, nns)
print 'Recall (V=%d, M=%d, subquants=%d): %s' % (m.V, m.M, m.subquantizer_clusters, str(recall))
m2 = LOPQModel(V=16, M=16, parameters=(m.Cs, None, None, None))
m2.fit(train, n_init=1)
searcher = LOPQSearcher(m2)
searcher.add_data(train)
recall, _ = get_recall(searcher, test, nns)
print 'Recall (V=%d, M=%d, subquants=%d): %s' % (m2.V, m2.M, m2.subquantizer_clusters, str(recall))
m3 = LOPQModel(V=16, M=8, subquantizer_clusters=512, parameters=(m.Cs, m.Rs, m.mus, None))
m3.fit(train, n_init=1)
searcher = LOPQSearcher(m3)
searcher.add_data(train)
recall, _ = get_recall(searcher, test, nns)
print 'Recall (V=%d, M=%d, subquants=%d): %s' % (m3.V, m3.M, m3.subquantizer_clusters, str(recall))
if __name__ == '__main__':
main()