-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsolution.py
614 lines (504 loc) · 26.9 KB
/
solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
import os
import typing
import numpy as np
import torch
import torch.optim
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score, average_precision_score
from torch import nn
from torch.nn import functional as F
from tqdm import trange
from util import ece, ParameterDistribution
"""
main model inspired by :
https://github.com/JavierAntoran/Bayesian-Neural-Networks/blob/master/src/Bayes_By_Backprop/model.py
"""
# Set `EXTENDED_EVALUATION` to `True` in order to visualize your predictions.
EXTENDED_EVALUATION = False
# Seed for random values
SEED = 42
# The distribution can either be Laplace or UnivariateGaussian
# Change the distribution in the BayesianLayer class
PRIOR_SIGMA = 0.1
MU_UNIFORM = (-0.2, 0.2)
RHO_UNIFORM = (-5, -4)
NUM_EPOCHS = 4 # number of training epochs
NUM_SAMPLES = 50 # number of samples for training
BATCH_SIZE = 128 # training batch size
LEARNING_RATE = 0.0029 # training learning rates
HIDDEN_LAYERS = (100, 100) # for each entry, creates a hidden layer with the corresponding number of units
def run_solution(dataset_train: torch.utils.data.Dataset, data_dir: str = os.curdir, output_dir: str = '/results/') -> 'Model':
"""
Run your task 2 solution.
This method should train your model, evaluate it, and return the trained model at the end.
Make sure to preserve the method signature and to return your trained model,
else the checker will fail!
:param dataset_train: Training dataset
:param data_dir: Directory containing the datasets
:return: Your trained model
"""
# Setting a manual seed for reproductible results
torch.manual_seed(0)
# Create model
model = Model()
# Train the model
print('Training model')
model.train(dataset_train)
# Predict using the trained model
print('Evaluating model on training data')
eval_loader = torch.utils.data.DataLoader(
dataset_train, batch_size=64, shuffle=False, drop_last=False
)
evaluate(model, eval_loader, data_dir, output_dir)
# IMPORTANT: return your model here!
return model
class Model(object):
"""
Task 2 model that can be used to train a BNN using Bayes by backprop and create predictions.
You need to implement all methods of this class without changing their signature,
else the checker will fail!
"""
def __init__(self):
# Hyperparameters and general parameters
# You might want to play around with those
self.num_epochs = NUM_EPOCHS # number of training epochs
self.num_samples = NUM_SAMPLES
self.batch_size = BATCH_SIZE # training batch size
learning_rate = LEARNING_RATE # training learning rates
hidden_layers = HIDDEN_LAYERS # for each entry, creates a hidden layer with the corresponding number of units
use_densenet = False # set this to True in order to run a DenseNet for comparison
self.print_interval = 100 # number of batches until updated metrics are displayed during training
# Determine network type
if use_densenet:
# DenseNet
print('Using a DenseNet model for comparison')
self.network = DenseNet(in_features=28 * 28, hidden_features=hidden_layers, out_features=10)
else:
# BayesNet
print('Using a BayesNet model')
self.network = BayesNet(in_features=28 * 28, hidden_features=hidden_layers, out_features=10)
# Optimizer for training
# Feel free to try out different optimizers
self.optimizer = torch.optim.Adam(self.network.parameters(), lr=learning_rate)
def train(self, dataset: torch.utils.data.Dataset):
"""
Train your neural network.
If the network is a DenseNet, this performs normal stochastic gradient descent training.
If the network is a BayesNet, this should perform Bayes by backprop.
:param dataset: Dataset you should use for training
"""
train_loader = torch.utils.data.DataLoader(
dataset, batch_size=self.batch_size, shuffle=True, drop_last=True
)
self.network.train()
progress_bar = trange(self.num_epochs)
for _ in progress_bar:
num_batches = len(train_loader)
for batch_idx, (batch_x, batch_y) in enumerate(train_loader):
# batch_x are of shape (batch_size, 784), batch_y are of shape (batch_size,)
self.network.zero_grad()
if isinstance(self.network, DenseNet):
# DenseNet training step
# Perform forward pass
current_logits = self.network(batch_x)
# Calculate the loss
# We use the negative log likelihood as the loss
# Combining nll_loss with a log_softmax is better for numeric stability
loss = F.nll_loss(F.log_softmax(current_logits, dim=1), batch_y, reduction='sum')
# Backpropagate to get the gradients
loss.backward()
else:
# BayesNet training step via Bayes by backprop
assert isinstance(self.network, BayesNet)
# Set the loss to 0 for dynamic computing
loss = torch.tensor(0.0)
# Calculate the loss
for i in range(self.num_samples):
current_logits, log_prior, log_variational_posterior = self.network(batch_x)
current_loss = F.nll_loss(F.log_softmax(current_logits, dim=1), batch_y, reduction='sum')
log_difference = (log_variational_posterior - log_prior) / num_batches
loss += (log_difference + current_loss) / self.num_samples
# Backpropagate to get the gradients
loss.backward(retain_graph=True)
self.optimizer.step()
# Update progress bar with accuracy occasionally
if batch_idx % self.print_interval == 0:
if isinstance(self.network, DenseNet):
current_logits = self.network(batch_x)
else:
assert isinstance(self.network, BayesNet)
current_logits, _, _ = self.network(batch_x)
current_accuracy = (current_logits.argmax(axis=1) == batch_y).float().mean()
progress_bar.set_postfix(loss=loss.item(), acc=current_accuracy.item())
def predict(self, data_loader: torch.utils.data.DataLoader) -> np.ndarray:
"""
Predict the class probabilities using your trained model.
This method should return an (num_samples, 10) NumPy float array
such that the second dimension sums up to 1 for each row.
:param data_loader: Data loader yielding the samples to predict on
:return: (num_samples, 10) NumPy float array where the second dimension sums up to 1 for each row
"""
self.network.eval()
probability_batches = []
for batch_x, batch_y in data_loader:
current_probabilities = self.network.predict_probabilities(batch_x).detach().numpy()
probability_batches.append(current_probabilities)
output = np.concatenate(probability_batches, axis=0)
assert isinstance(output, np.ndarray)
assert output.ndim == 2 and output.shape[1] == 10
assert np.allclose(np.sum(output, axis=1), 1.0)
return output
class BayesianLayer(nn.Module):
"""
Module implementing a single Bayesian feedforward layer.
It maintains a prior and variational posterior for the weights (and biases)
and uses sampling to approximate the gradients via Bayes by backprop.
"""
def __init__(self, in_features: int, out_features: int, bias: bool = True):
"""
Create a BayesianLayer.
:param in_features: Number of input features
:param out_features: Number of output features
:param bias: If true, use a bias term (i.e., affine instead of linear transformation)
"""
super().__init__()
self.in_features = in_features
self.out_features = out_features
self.use_bias = bias
# Prior for weights and biases as an instance of ParameterDistribution
self.prior = Laplace(torch.zeros((out_features, in_features)), torch.full((out_features, in_features), PRIOR_SIGMA))
assert isinstance(self.prior, ParameterDistribution)
assert not any(True for _ in self.prior.parameters()), 'Prior cannot have parameters'
# Variational posterior for weights as an instance of ParameterDistribution
self.weights_var_posterior = MultivariateDiagonalGaussian(
nn.Parameter(torch.Tensor(out_features, in_features).uniform_(MU_UNIFORM[0], MU_UNIFORM[1])),
nn.Parameter(torch.Tensor(out_features, in_features).uniform_(RHO_UNIFORM[0], RHO_UNIFORM[1]))
)
assert isinstance(self.weights_var_posterior, ParameterDistribution)
assert any(True for _ in self.weights_var_posterior.parameters()), 'Weight posterior must have parameters'
if self.use_bias:
# Variational posterior for biases as an instance of ParameterDistribution
self.bias_var_posterior = MultivariateDiagonalGaussian(
nn.Parameter(torch.Tensor(out_features, 1).uniform_(MU_UNIFORM[0], MU_UNIFORM[1])),
nn.Parameter(torch.Tensor(out_features, 1).uniform_(RHO_UNIFORM[0], RHO_UNIFORM[1]))
)
assert isinstance(self.bias_var_posterior, ParameterDistribution)
assert any(True for _ in self.bias_var_posterior.parameters()), 'Bias posterior must have parameters'
else:
self.bias_var_posterior = None
def forward(self, inputs: torch.Tensor):
"""
Perform one forward pass through this layer.
If you need to sample weights from the variational posterior, you can do it here during the forward pass.
Just make sure that you use the same weights to approximate all quantities
present in a single Bayes by backprop sampling step.
:param inputs: Flattened input images as a (batch_size, in_features) float tensor
:return: 3-tuple containing
i) transformed features using stochastic weights from the variational posterior,
ii) sample of the log-prior probability, and
iii) sample of the log-variational-posterior probability
"""
bias = None
weights = self.weights_var_posterior.sample()
log_prior = self.prior.log_likelihood(weights)
log_variational_posterior = self.weights_var_posterior.log_likelihood(weights)
# If `self.use_bias` is true, include the bias as well
if self.use_bias:
bias = self.bias_var_posterior.sample()
log_prior += self.prior.log_likelihood(bias)
log_variational_posterior += self.bias_var_posterior.log_likelihood(bias)
bias = torch.squeeze(bias)
return F.linear(inputs, weights, bias), log_prior, log_variational_posterior
class BayesNet(nn.Module):
"""
Module implementing a Bayesian feedforward neural network using BayesianLayer objects.
"""
def __init__(self, in_features: int, hidden_features: typing.Tuple[int, ...], out_features: int):
"""
Create a BNN.
:param in_features: Number of input features
:param hidden_features: Tuple where each entry corresponds to a (Bayesian) hidden layer with
the corresponding number of features.
:param out_features: Number of output features
"""
super().__init__()
feature_sizes = (in_features,) + hidden_features + (out_features,)
num_affine_maps = len(feature_sizes) - 1
self.layers = nn.ModuleList([
BayesianLayer(feature_sizes[idx], feature_sizes[idx + 1], bias=True)
for idx in range(num_affine_maps)
])
self.activation = nn.ReLU()
def forward(self, x: torch.Tensor) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Perform one forward pass through the BNN using a single set of weights
sampled from the variational posterior.
:param x: Input features, float tensor of shape (batch_size, in_features)
:return: 3-tuple containing
i) output features using stochastic weights from the variational posterior,
ii) sample of the log-prior probability, and
iii) sample of the log-variational-posterior probability
"""
current_features = x
# Log prior on weights set to 0 for dynamic computing
log_prior = torch.tensor(0.0)
# Log variational posterior on weights set to 0 for dynamic computing
log_variational_posterior = torch.tensor(0.0)
# Perform a forward pass for each layer and update the log values and features
for idx, current_layer in enumerate(self.layers):
new_features, lp, lvp = current_layer(current_features)
if idx < len(self.layers) - 1:
new_features = self.activation(new_features)
log_prior += lp
log_variational_posterior += lvp
current_features = new_features
output_features = current_features
return output_features, log_prior, log_variational_posterior
def predict_probabilities(self, x: torch.Tensor, num_mc_samples: int = 10) -> torch.Tensor:
"""
Predict class probabilities for the given features by sampling from this BNN.
:param x: Features to predict on, float tensor of shape (batch_size, in_features)
:param num_mc_samples: Number of MC samples to take for prediction
:return: Predicted class probabilities, float tensor of shape (batch_size, 10)
such that the last dimension sums up to 1 for each row
"""
probability_samples = torch.stack([F.softmax(self.forward(x)[0], dim=1) for _ in range(num_mc_samples)], dim=0)
estimated_probability = torch.mean(probability_samples, dim=0)
assert estimated_probability.shape == (x.shape[0], 10)
assert torch.allclose(torch.sum(estimated_probability, dim=1), torch.tensor(1.0))
return estimated_probability
class Laplace(ParameterDistribution):
def __init__(self, mu: torch.Tensor, sigma: torch.Tensor):
super(Laplace, self).__init__() # always make sure to include the super-class init call!
assert mu.size() == sigma.size()
self.mu = mu
self.sigma = sigma
def log_likelihood(self, values: torch.Tensor) -> torch.Tensor:
return torch.sum((-torch.log(2 * self.sigma) - torch.abs(values - self.mu) / self.sigma))
def sample(self) -> torch.Tensor:
# Setting a random seed for reproductible results
np.random.seed(SEED)
return self.mu + self.sigma * np.random.normal()
class UnivariateGaussian(ParameterDistribution):
"""
Univariate Gaussian distribution.
For multivariate data, this assumes all elements to be i.i.d.
"""
def __init__(self, mu: torch.Tensor, sigma: torch.Tensor):
super(UnivariateGaussian, self).__init__() # always make sure to include the super-class init call!
assert mu.size() == sigma.size()
self.mu = mu
self.sigma = sigma
def log_likelihood(self, values: torch.Tensor) -> torch.Tensor:
cte_term = -(0.5) * np.log(2 * np.pi)
det_sig_term = -torch.log(self.sigma)
dist_term = -(0.5) * (((values - self.mu) / self.sigma) ** 2)
return torch.sum((cte_term + det_sig_term + dist_term))
def sample(self) -> torch.Tensor:
# Setting a random seed for reproductible results
np.random.seed(SEED)
return self.mu + self.sigma * np.random.normal()
class MultivariateDiagonalGaussian(ParameterDistribution):
"""
Multivariate diagonal Gaussian distribution,
i.e., assumes all elements to be independent Gaussians
but with different means and standard deviations.
This parameterizes the standard deviation via a parameter rho as
sigma = softplus(rho).
"""
def __init__(self, mu: torch.Tensor, rho: torch.Tensor):
super(MultivariateDiagonalGaussian, self).__init__() # always make sure to include the super-class init call!
assert mu.size() == rho.size()
self.mu = mu
self.rho = rho
self.sigma = torch.log(1 + torch.exp(rho))
def log_likelihood(self, values: torch.Tensor) -> torch.Tensor:
mean_diff = torch.square(values - self.mu)
std = torch.square(self.sigma)
loss_term = torch.div(mean_diff, 2 * std)
return torch.sum(-loss_term - torch.log(2 * np.math.pi * std))
def sample(self) -> torch.Tensor:
# Setting a random seed for reproductible results
np.random.seed(SEED)
return (self.mu + self.sigma * np.random.normal())
def evaluate(model: Model, eval_loader: torch.utils.data.DataLoader, data_dir: str, output_dir: str):
"""
Evaluate your model.
:param model: Trained model to evaluate
:param eval_loader: Data loader containing the training set for evaluation
:param data_dir: Data directory from which additional datasets are loaded
:param output_dir: Directory into which plots are saved
"""
# Predict class probabilities on test data
predicted_probabilities = model.predict(eval_loader)
# Calculate evaluation metrics
predicted_classes = np.argmax(predicted_probabilities, axis=1)
actual_classes = eval_loader.dataset.tensors[1].detach().numpy()
accuracy = np.mean((predicted_classes == actual_classes))
ece_score = ece(predicted_probabilities, actual_classes)
print(f'Accuracy: {accuracy.item():.3f}, ECE score: {ece_score:.3f}')
if EXTENDED_EVALUATION:
eval_samples = eval_loader.dataset.tensors[0].detach().numpy()
# Determine confidence per sample and sort accordingly
confidences = np.max(predicted_probabilities, axis=1)
sorted_confidence_indices = np.argsort(confidences)
# Plot samples your model is most confident about
print('Plotting most confident MNIST predictions')
most_confident_indices = sorted_confidence_indices[-10:]
fig, ax = plt.subplots(4, 5, figsize=(13, 11))
for row in range(0, 4, 2):
for col in range(5):
sample_idx = most_confident_indices[5 * row // 2 + col]
ax[row, col].imshow(np.reshape(eval_samples[sample_idx], (28, 28)), cmap='gray')
ax[row, col].set_axis_off()
ax[row + 1, col].set_title(f'predicted {predicted_classes[sample_idx]}, actual {actual_classes[sample_idx]}')
bar_colors = ['C0'] * 10
bar_colors[actual_classes[sample_idx]] = 'C1'
ax[row + 1, col].bar(
np.arange(10), predicted_probabilities[sample_idx], tick_label=np.arange(10), color=bar_colors
)
fig.suptitle('Most confident predictions', size=20)
fig.savefig(os.path.join(output_dir, 'mnist_most_confident.pdf'))
# Plot samples your model is least confident about
print('Plotting least confident MNIST predictions')
least_confident_indices = sorted_confidence_indices[:10]
fig, ax = plt.subplots(4, 5, figsize=(13, 11))
for row in range(0, 4, 2):
for col in range(5):
sample_idx = least_confident_indices[5 * row // 2 + col]
ax[row, col].imshow(np.reshape(eval_samples[sample_idx], (28, 28)), cmap='gray')
ax[row, col].set_axis_off()
ax[row + 1, col].set_title(f'predicted {predicted_classes[sample_idx]}, actual {actual_classes[sample_idx]}')
bar_colors = ['C0'] * 10
bar_colors[actual_classes[sample_idx]] = 'C1'
ax[row + 1, col].bar(
np.arange(10), predicted_probabilities[sample_idx], tick_label=np.arange(10), color=bar_colors
)
fig.suptitle('Least confident predictions', size=20)
fig.savefig(os.path.join(output_dir, 'mnist_least_confident.pdf'))
print('Plotting ambiguous and rotated MNIST confidences')
ambiguous_samples = torch.from_numpy(np.load(os.path.join(data_dir, 'test_x.npz'))['test_x']).reshape([-1, 784])[:10]
ambiguous_dataset = torch.utils.data.TensorDataset(ambiguous_samples, torch.zeros(10))
ambiguous_loader = torch.utils.data.DataLoader(
ambiguous_dataset, batch_size=10, shuffle=False, drop_last=False
)
ambiguous_predicted_probabilities = model.predict(ambiguous_loader)
ambiguous_predicted_classes = np.argmax(ambiguous_predicted_probabilities, axis=1)
fig, ax = plt.subplots(4, 5, figsize=(13, 11))
for row in range(0, 4, 2):
for col in range(5):
sample_idx = 5 * row // 2 + col
ax[row, col].imshow(np.reshape(ambiguous_samples[sample_idx], (28, 28)), cmap='gray')
ax[row, col].set_axis_off()
ax[row + 1, col].set_title(f'predicted {ambiguous_predicted_classes[sample_idx]}')
ax[row + 1, col].bar(
np.arange(10), ambiguous_predicted_probabilities[sample_idx], tick_label=np.arange(10)
)
fig.suptitle('Predictions on ambiguous and rotated MNIST', size=20)
fig.savefig(os.path.join(output_dir, 'ambiguous_rotated_mnist.pdf'))
# Do the same evaluation as on MNIST also on FashionMNIST
print('Predicting on FashionMNIST data')
fmnist_samples = torch.from_numpy(np.load(os.path.join(data_dir, 'fmnist.npz'))['x_test']).reshape([-1, 784])
fmnist_dataset = torch.utils.data.TensorDataset(fmnist_samples, torch.zeros(fmnist_samples.shape[0]))
fmnist_loader = torch.utils.data.DataLoader(
fmnist_dataset, batch_size=64, shuffle=False, drop_last=False
)
fmnist_predicted_probabilities = model.predict(fmnist_loader)
fmnist_predicted_classes = np.argmax(fmnist_predicted_probabilities, axis=1)
fmnist_confidences = np.max(fmnist_predicted_probabilities, axis=1)
fmnist_sorted_confidence_indices = np.argsort(fmnist_confidences)
# Plot FashionMNIST samples your model is most confident about
print('Plotting most confident FashionMNIST predictions')
most_confident_indices = fmnist_sorted_confidence_indices[-10:]
fig, ax = plt.subplots(4, 5, figsize=(13, 11))
for row in range(0, 4, 2):
for col in range(5):
sample_idx = most_confident_indices[5 * row // 2 + col]
ax[row, col].imshow(np.reshape(fmnist_samples[sample_idx], (28, 28)), cmap='gray')
ax[row, col].set_axis_off()
ax[row + 1, col].set_title(f'predicted {fmnist_predicted_classes[sample_idx]}')
ax[row + 1, col].bar(
np.arange(10), fmnist_predicted_probabilities[sample_idx], tick_label=np.arange(10)
)
fig.suptitle('Most confident predictions', size=20)
fig.savefig(os.path.join(output_dir, 'fashionmnist_most_confident.pdf'))
# Plot FashionMNIST samples your model is least confident about
print('Plotting least confident FashionMNIST predictions')
least_confident_indices = fmnist_sorted_confidence_indices[:10]
fig, ax = plt.subplots(4, 5, figsize=(13, 11))
for row in range(0, 4, 2):
for col in range(5):
sample_idx = least_confident_indices[5 * row // 2 + col]
ax[row, col].imshow(np.reshape(fmnist_samples[sample_idx], (28, 28)), cmap='gray')
ax[row, col].set_axis_off()
ax[row + 1, col].set_title(f'predicted {fmnist_predicted_classes[sample_idx]}')
ax[row + 1, col].bar(
np.arange(10), fmnist_predicted_probabilities[sample_idx], tick_label=np.arange(10)
)
fig.suptitle('Least confident predictions', size=20)
fig.savefig(os.path.join(output_dir, 'fashionmnist_least_confident.pdf'))
print('Determining suitability of your model for OOD detection')
all_confidences = np.concatenate([confidences, fmnist_confidences])
dataset_labels = np.concatenate([np.ones_like(confidences), np.zeros_like(fmnist_confidences)])
print(
'AUROC for MNIST vs. FashionMNIST OOD detection based on confidence: '
f'{roc_auc_score(dataset_labels, all_confidences):.3f}'
)
print(
'AUPRC for MNIST vs. FashionMNIST OOD detection based on confidence: '
f'{average_precision_score(dataset_labels, all_confidences):.3f}'
)
class DenseNet(nn.Module):
"""
Simple module implementing a feedforward neural network.
You can use this model as a reference/baseline for calibration
in the normal neural network case.
"""
def __init__(self, in_features: int, hidden_features: typing.Tuple[int, ...], out_features: int):
"""
Create a normal NN.
:param in_features: Number of input features
:param hidden_features: Tuple where each entry corresponds to a hidden layer with
the corresponding number of features.
:param out_features: Number of output features
"""
super().__init__()
feature_sizes = (in_features,) + hidden_features + (out_features,)
num_affine_maps = len(feature_sizes) - 1
self.layers = nn.ModuleList([
nn.Linear(feature_sizes[idx], feature_sizes[idx + 1], bias=True)
for idx in range(num_affine_maps)
])
self.activation = nn.ReLU()
def forward(self, x: torch.Tensor) -> torch.Tensor:
current_features = x
for idx, current_layer in enumerate(self.layers):
new_features = current_layer(current_features)
if idx < len(self.layers) - 1:
new_features = self.activation(new_features)
current_features = new_features
return current_features
def predict_probabilities(self, x: torch.Tensor) -> torch.Tensor:
assert x.shape[1] == 28 ** 2
estimated_probability = F.softmax(self.forward(x), dim=1)
assert estimated_probability.shape == (x.shape[0], 10)
return estimated_probability
def main():
"""
raise RuntimeError(
'This main method is for illustrative purposes only and will NEVER be called by the checker!\n'
'The checker always calls run_solution directly.\n'
'Please implement your solution exclusively in the methods and classes mentioned in the task description.'
)
"""
# Load training data
data_dir = os.curdir
output_dir = os.curdir
raw_train_data = np.load(os.path.join(data_dir, 'train_data.npz'))
x_train = torch.from_numpy(raw_train_data['train_x']).reshape([-1, 784])
y_train = torch.from_numpy(raw_train_data['train_y']).long()
dataset_train = torch.utils.data.TensorDataset(x_train, y_train)
# Run actual solution
run_solution(dataset_train, data_dir=data_dir, output_dir=output_dir)
if __name__ == "__main__":
main()