-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpatial&Attn_Block.py
105 lines (83 loc) · 3.37 KB
/
Spatial&Attn_Block.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import torch
import torch.nn as nn
'''
Paper: `Spatiotemporal Convolutional LSTM for Radar Echo Extrapolation`
'''
class SpatialBlock(nn.Module):
def __init__(self, in_channels, width, fusion_types='channel_add', tln=0):
super(SpatialBlock, self).__init__()
# check if the fusion_type parameter is correct
assert fusion_types in ['channel_add', 'channel_mul']
# correct, fusion_types is the way of x_t, c_t, m_t fusion
self.fusion_types = fusion_types
self.in_channels = in_channels
self.planes = in_channels // 8
self.width = width
# spatial attention / context modeling
self.conv_pool = nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3)
self.sigmoid = nn.Sigmoid()
# transform
self.trans = nn.Conv2d(self.in_channels, self.in_channels, kernel_size=1)
def forward(self, x_t, c_t, m_t):
if self.fusion_types == 'channel_add':
x_c_m_t = x_t + c_t + m_t
else:
x_c_m_t = x_t * c_t * m_t
# spatial attention
avgout = torch.mean(x_c_m_t, dim=1, keepdim=True)
maxout, _ = torch.max(x_c_m_t, dim=1, keepdim=True)
avg_max = torch.cat([avgout, maxout], dim=1)
context_mask = self.conv_pool(avg_max)
context_mask = self.sigmoid(context_mask)
context = context_mask * x_c_m_t
# transform
mask = self.trans(context)
return mask
class AttnBlock(nn.Module):
def __init__(self, in_channels, height, width, fusion_types='channel_add'):
super(AttnBlock, self).__init__()
# check if the fusion_type parameter is correct
assert fusion_types in ['channel_add', 'channel_mul']
# correct, fusion_types is the way of x_t, c_t, m_t fusion
self.fusion_types = fusion_types
self.in_channels = in_channels
self.width = width
self.height = height
# mix the average & maximum feature
self.conv_pool = nn.Sequential(
nn.Conv2d(in_channels=2, out_channels=1, kernel_size=7, stride=1, padding=3),
nn.LayerNorm([1, self.height, self.width]),
nn.ReLU(inplace=True)
)
# spatial attention / context modeling
self.linear = nn.Conv2d(1, 1, kernel_size=1, stride=1, padding=0)
self.softmax = torch.nn.Softmax(dim=1)
# transform
self.trans = nn.Conv2d(self.in_channels, self.in_channels, kernel_size=1)
def forward(self, x_t, c_t, m_t):
if self.fusion_types == 'channel_add':
x_c_m_t = x_t + c_t + m_t
else:
x_c_m_t = x_t * c_t * m_t
# choose the average & obvious feature
avgout = torch.mean(x_c_m_t, dim=1, keepdim=True)
maxout, _ = torch.max(x_c_m_t, dim=1, keepdim=True)
avg_max = torch.cat([avgout, maxout], dim=1)
feature_map = self.conv_pool(avg_max)
# calculate attention
query_context = self.linear(feature_map)
context = self.softmax(query_context) * x_c_m_t
# transform
mask = self.trans(context)
return mask
if __name__ == '__main__':
model = SpatialBlock(16)
print(model)
x = torch.randn(1, 16, 64, 64)
out = model(x, x, x)
print(out.shape)
attn = AttnBlock(16, 64, 64)
print(attn)
x = torch.randn(1, 16, 64, 64)
out = attn(x, x, x)
print(out.shape)