-
Notifications
You must be signed in to change notification settings - Fork 11
/
model.py
131 lines (116 loc) · 4.82 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
##################################################
# Imports
##################################################
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
from torchvision.utils import make_grid
from torch.optim import Adam
import pytorch_lightning as pl
import numpy as np
# Custom
from metrics import meanIoU
from dataloader import Denorm
##################################################
# Hands Segmentor
##################################################
class HandSegModel(pl.LightningModule):
"""
This model is based on the PyTorch DeepLab model for semantic segmentation.
"""
def __init__(self, pretrained=False, lr=1e-4, in_channels=3):
super().__init__()
assert in_channels in [1, 3, 4]
self.deeplab = self._get_deeplab(pretrained=pretrained, num_classes=2, in_channels=in_channels)
self.denorm_image_for_tb_log = None # For tensorboard logging
self.lr = lr
if pretrained:
if in_channels == 1:
mean, std = np.array([0.5]), np.array([0.5])
elif in_channels == 3:
mean, std = np.array([0.485, 0.456, 0.406]), np.array([0.229, 0.224, 0.225])
elif in_channels == 4:
mean, std = np.array([0.485, 0.456, 0.406, 0.5]), np.array([0.229, 0.224, 0.225, 0.5])
self.denorm_image_for_tb_log = Denorm(mean, std)
def _get_deeplab(self, pretrained=False, num_classes=2, in_channels=3):
"""
Get the PyTorch DeepLab model architecture.
"""
deeplab = models.segmentation.deeplabv3_resnet50(
pretrained=False,
num_classes=num_classes
)
if pretrained:
deeplab_21 = models.segmentation.deeplabv3_resnet50(
pretrained=True,
progress=True,
num_classes=21
)
for c1, c2 in zip(deeplab.children(), deeplab_21.children()):
for p1, p2 in zip(c1.parameters(), c2.parameters()):
if p1.shape == p2.shape:
p1.data = p2.data
if in_channels == 1:
weight = deeplab.backbone.conv1.weight
deeplab.backbone.conv1.weight = nn.Parameter(weight.data[:, 0:1])
elif in_channels == 4:
weight = deeplab.backbone.conv1.weight
C, _, H, W = weight.shape
deeplab.backbone.conv1.weight = nn.Parameter(torch.cat([
weight.data,
torch.randn(C, 1, H, W, device=weight.device) * 0.1,
], 1))
return deeplab
def forward(self, x):
return self.deeplab(x)['out']
def training_step(self, batch, idx_batch):
x, y = batch
logits = self(x)
loss = F.binary_cross_entropy_with_logits(logits, y)
y_hat = F.softmax(logits, 1).detach()
miou = meanIoU(y_hat, y.argmax(1))
# Cache
self.log('train_bce', loss, prog_bar=True)
self.log('train_mIoU', miou, prog_bar=True)
return loss
def validation_step(self, batch, idx_batch):
x, y = batch
logits = self(x)
loss = F.binary_cross_entropy_with_logits(logits, y)
y_hat = F.softmax(logits, 1).detach()
miou = meanIoU(y_hat, y.argmax(1))
# Cache
self.log('validation_bce', loss, prog_bar=True)
self.log('validation_mIoU', miou, prog_bar=True)
if idx_batch == 0:
tb_log = self.trainer.logger.experiment
if self.denorm_image_for_tb_log:
x = self.denorm_image_for_tb_log(x)
x_grid = make_grid(x[:16], nrow=4)
y_hat_grid = make_grid(y_hat[:16].argmax(1).unsqueeze(1), nrow=4)[0:1]
tb_log.add_image('validation_images', x_grid.cpu().numpy())
tb_log.add_image('validation_preds', y_hat_grid.cpu().numpy())
return loss
def test_step(self, batch, idx_batch):
x, y = batch
logits = self(x)
loss = F.binary_cross_entropy_with_logits(logits, y)
y_hat = F.softmax(logits, 1).detach()
miou = meanIoU(y_hat, y.argmax(1))
# Cache
self.log('test_bce', loss, prog_bar=True)
self.log('test_mIoU', miou, prog_bar=True)
if idx_batch == 0:
tb_log = self.trainer.logger.experiment
if self.denorm_image_for_tb_log:
x = self.denorm_image_for_tb_log(x)
x_grid = make_grid(x[:16], nrow=4)
y_hat_grid = make_grid(y_hat[:16].argmax(1).unsqueeze(1), nrow=4)[0:1]
tb_log.add_image('test_images', x_grid.cpu().numpy())
tb_log.add_image('test_preds', y_hat_grid.cpu().numpy())
return loss
def configure_optimizers(self):
return Adam(self.parameters(), lr=self.lr)
def set_denorm_fn(self, denorm_fn):
self.denorm_image_for_tb_log = denorm_fn