-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathDNGO.py
146 lines (118 loc) · 5.12 KB
/
DNGO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import torch
from torch.autograd import Variable
from SimpleNeuralNet import Net
from scipy import optimize
class DG:
def __init__(self, num_epochs, learning_rate, H, D,
alpha = 1.0, beta = 1000):
"""
A pytorch implementation of Deep Networks for Global Optimizatin [1]. This module performas Bayesian Linear Regression with basis function extracted from a
neural network.
[1] J. Snoek, O. Rippel, K. Swersky, R. Kiros, N. Satish,
N. Sundaram, M.~M.~A. Patwary, Prabhat, R.~P. Adams
Scalable Bayesian Optimization Using Deep Neural Networks
Proc. of ICML'15
Parameters
----------
"""
self.X = None
self.Y = None
self.network = None
self.alpha = alpha
self.beta = beta
self.init_learning_rate = learning_rate
self.num_epochs = num_epochs
self.H = H # the neural number of the middle layers
self.D = D # size of the last hidden layer
def train(self, X, Y):
"""
Trains the model on the provided data.
The training data base can be enriched.
Parameters
----------
X: np.ndarray (N, D)
Input datapoints. The dimensionality of X is (N, D),
with N as the number of points and D is the number of features.
self.X: torch float tensor of the normalized input(X)
Y: np.ndarray (N, T)
The corresponding target values.
The dimensionality of Y is (N, T), where N has to
match the number of points of X and T is the number of objectives
self.Y: torch float tensor of the normalized Y
"""
# Normalize inputs
(normX, normY) = self.normalize(X, Y)
self.X = Variable(torch.from_numpy(normX).float())
self.Y = Variable(torch.from_numpy(normY).float(), requires_grad=False)
features = X.shape[1]
self.network = Net(features, self.H, self.D, 1) # here we suppose that D_out = 1
loss_fn = torch.nn.MSELoss(size_average=True)
optimizer = torch.optim.Adam(self.network.parameters(), lr=self.init_learning_rate)
for t in range(self.num_epochs):
y_pred = self.network(self.X)
loss = loss_fn(y_pred, self.Y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
self.phi = self.network.PHI(self.X).data
res = optimize.fmin(self.marginal_log_likelihood, np.random.rand(2))
self.hypers = [np.exp(res[0]), np.exp(res[1])]
return(self.hypers)
def marginal_log_likelihood(self, theta): # theta are the hyperparameters to be optimized
if np.any((-5 > theta) + (theta > 10)):
return -1e25
alpha = np.exp(theta[0]) # it is not clear why here we calculate the exponential
beta = np.exp(theta[1])
Ydata = self.Y.data # for the bayesian part, we do not need Y to be a variable anymore
D = self.X.size()[1]
N = self.X.size()[0]
Identity = torch.eye(self.phi.size()[1])
self.phi_T = torch.transpose(self.phi, 0, 1)
self.K = torch.addmm(beta, Identity, alpha, self.phi_T, self.phi)
self.K_inverse = torch.inverse(self.K)
m = beta*torch.mm(self.K_inverse, self.phi_T)
self.m = torch.mv(m, Ydata)
mll = (D/2.)*np.log(alpha)
mll += (N/2.)*np.log(beta)
mll -= (N/2.) * np.log(2*np.pi)
mll -= (beta/2.)* torch.norm(Ydata - torch.mv(self.phi, self.m),2)
mll -= (alpha/2.) * torch.dot(self.m,self.m)
Knumpy = self.K.numpy() # convert K to numpy for determinant calculation
mll -= 0.5*np.log(np.linalg.det(Knumpy))
return -mll
def predict(self, xtest):
mx = Variable(torch.from_numpy(np.array(self._mx)).float())
sx = Variable(torch.from_numpy(np.array(self._sx)).float())
xtest = (xtest - mx)/sx
phi_test = self.network.PHI(xtest).data
phi_T = torch.transpose(phi_test, 0, 1)
self.marginal_log_likelihood(self.hypers)
mean = np.dot(phi_test.numpy(), self.m)
mean = mean*self._sy+self._my
var = np.diag(np.dot(phi_test.numpy(),np.dot(self.K_inverse.numpy(), phi_T.numpy())))+(1./self.hypers[1])
v = var
v *=(self._sy**2)
return mean, var
def normalize(self, x, y):
col=x.shape[1]
row=x.shape[0]
mx=list()
sx=list()
for i in range(col):
mx.append(np.mean(x[:,i]))
sx.append(np.std(x[:,i],ddof=1))
my=np.mean(y)
sy=np.std(y,ddof=1)
self._mx=mx
self._sx=sx
self._my=my
self._sy=sy
mx_mat=np.mat(np.zeros((row,col)))
sx_mat=np.mat(np.zeros((row,col)))
for i in range(row):
mx_mat[i,:]=mx
sx_mat[i,:]=sx
x_nom=(x-mx_mat)/sx_mat
y_nom=(y-self._my)/self._sy
return x_nom,y_nom