-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTsneSolver.py
127 lines (107 loc) · 4.54 KB
/
TsneSolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from TSNEImplementation import *
class TsneSolver:
def __init__(self, p=30.0, m_i=500, n_d=2, i_d=50):
""" Конструктор класса TsneSolver
Устанавливает параметры t-SNE
:param p: перплексия
:param m_i: количество итераций
:param n_d: новая размерность
:param i_d: исходная размерность
"""
self.perplexity = p
self.max_iteration = m_i
self.no_dims = n_d
self.initial_dims = i_d
self.tsne_error = 0
def set_params(self, p=30.0, m_i=500, n_d=2, i_d=50):
""" Метод для установки параметров t-SNE
:param p: перплексия
:param m_i: количество итераций
:param n_d: новая размерность
:param i_d: исходная размерность
"""
self.perplexity = p
self.max_iteration = m_i
self.no_dims = n_d
self.initial_dims = i_d
def reduce_dim(self, data): # TODO: change to NumPy from begin
""" Преобразование GlobalData в NumPy.ndarray -> запуск self.tsne
:param data: объект GlobalData(Data)
:return: NumPy.ndarray(<строки в файле>*<кол-во итераций>) TODO: change
"""
a = []
# list(GlobalData) == rows in file
rows_to_clusterize = list(data)
for i in range(len(rows_to_clusterize)):
for j in range(len(rows_to_clusterize.__getitem__(i))):
a.append(rows_to_clusterize.__getitem__(i).__getitem__(j))
data_numpy_array = np.array(a)
data_numpy_array = np.reshape(data_numpy_array, (-1, len(rows_to_clusterize.__getitem__(0))))
result = self.tsne(data_numpy_array)
return result
def tsne(self, X=np.array([])):
""" Here is magic works # TODO: i will stop it
Runs t-SNE on the dataset in the NxD array X to reduce its
dimensionality to no_dims dimensions. The syntaxis of the function is
`Y = tsne.tsne(X, no_dims, perplexity), where X is an NxD NumPy array.
"""
# Check inputs
if isinstance(self.no_dims, float):
print("Error: array X should have type float.")
return -1
if round(self.no_dims) != self.no_dims:
print("Error: number of dimensions should be an integer.")
return -1
# Initialize variables
X = TSNEWindow.pca(X, self.initial_dims).real
(n, d) = X.shape
max_iter = self.max_iteration
initial_momentum = 0.5
final_momentum = 0.8
eta = 500
min_gain = 0.01
Y = np.random.randn(n, self.no_dims)
dY = np.zeros((n, self.no_dims))
iY = np.zeros((n, self.no_dims))
gains = np.ones((n, self.no_dims))
# Compute P-values
P = TSNEWindow.x2p(X, 1e-5, self.perplexity)
P = P + np.transpose(P)
P = P / np.sum(P)
P = P * 4. # early exaggeration
P = np.maximum(P, 1e-12)
# Подкотовка изображений
# Run iterations
for iter in range(max_iter):
# Compute pairwise affinities
sum_Y = np.sum(np.square(Y), 1)
num = -2. * np.dot(Y, Y.T)
num = 1. / (1. + np.add(np.add(num, sum_Y).T, sum_Y))
num[range(n), range(n)] = 0.
Q = num / np.sum(num)
Q = np.maximum(Q, 1e-12)
# Compute gradient
PQ = P - Q
for i in range(n):
dY[i, :] = np.sum(np.tile(PQ[:, i] * num[:, i], (self.no_dims, 1)).T * (Y[i, :] - Y), 0)
# Perform the update
if iter < 20:
momentum = initial_momentum
else:
momentum = final_momentum
gains = (gains + 0.2) * ((dY > 0.) != (iY > 0.)) + \
(gains * 0.8) * ((dY > 0.) == (iY > 0.))
gains[gains < min_gain] = min_gain
iY = momentum * iY - eta * (gains * dY)
Y = Y + iY
Y = Y - np.tile(np.mean(Y, 0), (n, 1))
# Compute current value of cost function
if (iter + 1) % 10 == 0:
C = np.sum(P * np.log(P / Q))
# print("Iteration %d: error is %f" % (iter + 1, C))
self.tsne_error = C # error saving
# Stop lying about P-values
if iter == 100:
P = P / 4.
# Return solution
return Y