-
Notifications
You must be signed in to change notification settings - Fork 3
/
diffussion.py
113 lines (97 loc) · 3.92 KB
/
diffussion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# Author: Dmytro Mishkin, 2018.
# This is simple partial re-implementation of papers
# Fast Spectral Ranking for Similarity Search, CVPR 2018. https://arxiv.org/abs/1703.06935
# and Efficient Diffusion on Region Manifolds: Recovering Small Objects with Compact CNN Representations, CVPR 2017 http://people.rennes.inria.fr/Ahmet.Iscen/diffusion.html
import time
import os
import numpy as np
from scipy.io import loadmat
from scipy.sparse import csr_matrix, eye, diags
from scipy.sparse import linalg as s_linalg
def sim_kernel(dot_product):
return np.maximum(np.power(dot_product,3),0)
def normalize_connection_graph(G):
start_time = time.time()
W = csr_matrix(G)
W = W - diags(W.diagonal())
D = np.array(1./ np.sqrt(W.sum(axis = 1)))
D[np.isnan(D)] = 0
D[np.isinf(D)] = 0
D_mh = diags(D.reshape(-1))
Wn = D_mh * W * D_mh
duration = time.time() - start_time
minute = int(duration / 60)
second = int(duration) % 60
print("%dminutes %dseconds" % (minute,second))
return Wn
def topK_W(G, K = 100):
sortidxs = np.argsort(-G, axis = 1)
for i in range(G.shape[0]):
G[i,sortidxs[i,K:]] = 0
G = np.minimum(G, G.T)
return G
def find_trunc_graph(qs, W, levels = 3):
needed_idxs = []
needed_idxs = list(np.nonzero(qs > 0)[0])
for l in range(levels):
idid = W.nonzero()[1]
needed_idxs.extend(list(idid))
needed_idxs =list(set(needed_idxs))
return np.array(needed_idxs), W[needed_idxs,:][:,needed_idxs]
def dfs_trunk(sim, A,alpha = 0.99, QUERYKNN = 10, maxiter = 8, K = 100, tol = 1e-3):
start_time = time.time()
qsim = sim_kernel(sim).T
sortidxs = np.argsort(-qsim, axis = 1)
for i in range(len(qsim)):
qsim[i,sortidxs[i,QUERYKNN:]] = 0
qsims = sim_kernel(qsim)
W = sim_kernel(A)
W = csr_matrix(topK_W(W, K))
out_ranks = []
t =time()
for i in range(qsims.shape[0]):
qs = qsims[i,:]
tt = time()
w_idxs, W_trunk = find_trunc_graph(qs, W, 2);
Wn = normalize_connection_graph(W_trunk)
Wnn = eye(Wn.shape[0]) - alpha * Wn
f,inf = s_linalg.minres(Wnn, qs[w_idxs], tol=tol, maxiter=maxiter)
ranks = w_idxs[np.argsort(-f.reshape(-1))]
missing = np.setdiff1d(np.arange(A.shape[1]), ranks)
out_ranks.append(np.concatenate([ranks.reshape(-1,1), missing.reshape(-1,1)], axis = 0))
out_ranks = np.concatenate(out_ranks, axis = 1)
duration = time.time() - start_time
minute = int(duration / 60)
second = int(duration) % 60
print("%dminutes %dseconds" % (minute,second))
return out_ranks
def cg_diffusion(qsims, Wn, alpha = 0.99, maxiter = 10, tol = 1e-3):
Wnn = eye(Wn.shape[0]) - alpha * Wn
out_sims = []
for i in range(qsims.shape[0]):
#f,inf = s_linalg.cg(Wnn, qsims[i,:], tol=tol, maxiter=maxiter)
f,inf = s_linalg.minres(Wnn, qsims[i,:], tol=tol, maxiter=maxiter)
out_sims.append(f.reshape(-1,1))
out_sims = np.concatenate(out_sims, axis = 1)
ranks = np.argsort(-out_sims, axis = 0)
return ranks
def fsr_rankR(qsims, Wn, alpha = 0.99, R = 2000):
start_time = time.time()
vals, vecs = s_linalg.eigsh(Wn, k = R)
p2 = diags((1.0 - alpha) / (1.0 - alpha*vals))
vc = csr_matrix(vecs)
p3 = vc.dot(p2)
##vc_norm = (vc.multiply(vc)).sum(axis = 0)
out_sims = []
for i in range(qsims.shape[0]):
qsims_sparse = csr_matrix(qsims[i:i+1,:])
p1 =(vc.T).dot(qsims_sparse.T)
diff_sim = csr_matrix(p3)*csr_matrix(p1)
out_sims.append(diff_sim.todense().reshape(-1,1))
out_sims = np.concatenate(out_sims, axis = 1)
ranks = np.argsort(-out_sims, axis = 0)
duration = time.time() - start_time
minute = int(duration / 60)
second = int(duration) % 60
print("%dminutes %dseconds" % (minute,second))
return ranks.T