-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathencoderl.py
193 lines (156 loc) · 7.06 KB
/
encoderl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# coding:utf-8
import torch
import math
import itertools
import cv2
import numpy as np
class DataEncoder:
def __init__(self):
'''
compute default boxes
'''
scale = 1024.
sizes = [s / scale for s in (32, 64, 256)] # 当32改为64时,achor与label匹配的正样本数目更多
# anchor's size.In the paper, the first output layer size is (32, 64, 128)
aspect_ratios = ((1,2,4), (1,), (1,)) # more understanding
feature_map_sizes = (32, 16, 8)
density = [[-4,-1,1,4],[-2,2],[0]] # density for output layer1
num_layers = len(feature_map_sizes)
boxes = []
for i in range(num_layers): ### 3 times
fmsize = feature_map_sizes[i]
for h,w in itertools.product(range(fmsize), repeat=2): ### featuresize.pow(2)
cx = (w + 0.5)/feature_map_sizes[i] # every grid value of centerX
cy = (h + 0.5)/feature_map_sizes[i]
s = sizes[i]
for j,ar in enumerate(aspect_ratios[i]): ### 3 , 1, 1
if i == 0:
for dx,dy in itertools.product(density[j], repeat=2): ### 21 = 4*4 + 2*2 + 1
boxes.append((cx+dx/8.*s*ar, cy+dy/8.*s*ar, s*ar, s*ar)) # add the offset
else:
boxes.append((cx, cy, s*ar, s*ar))
self.default_boxes = torch.Tensor(boxes)
def iou(self, box1, box2):
'''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
Args:
box1: (tensor) bounding boxes, sized [N,4].
box2: (tensor) bounding boxes, sized [M,4].
Return:
(tensor) iou, sized [N,M].
'''
N = box1.size(0)
M = box2.size(0)
lt = torch.max( # left top
box1[:,:2].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2]
box2[:,:2].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2]
)
rb = torch.min( # right bottom
box1[:,2:].unsqueeze(1).expand(N,M,2), # [N,2] -> [N,1,2] -> [N,M,2]
box2[:,2:].unsqueeze(0).expand(N,M,2), # [M,2] -> [1,M,2] -> [N,M,2]
)
wh = rb - lt # [N,M,2]
wh[wh<0] = 0 # clip at 0
inter = wh[:,:,0] * wh[:,:,1] # [N,M]
area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1]) # [N,]
area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1]) # [M,]
area1 = area1.unsqueeze(1).expand_as(inter) # [N,] -> [N,1] -> [N,M]
area2 = area2.unsqueeze(0).expand_as(inter) # [M,] -> [1,M] -> [N,M]
iou = inter / (area1 + area2 - inter)
return iou
def nms(self, bboxes, scores, threshold=0.4):#0.5
'''
bboxes(tensor) [N,4]
scores(tensor) [N,]
'''
x1 = bboxes[:,0]
y1 = bboxes[:,1]
x2 = bboxes[:,2]
y2 = bboxes[:,3]
areas = (x2-x1) * (y2-y1)
_,order = scores.sort(0,descending=True)
keep = []
while order.numel() > 0:
if order.numel() == 1:
i = order.item()
keep.append(i)
break
else:
i = order[0].item()
keep.append(i)
xx1 = x1[order[1:]].clamp(min=x1[i])
yy1 = y1[order[1:]].clamp(min=y1[i])
xx2 = x2[order[1:]].clamp(max=x2[i])
yy2 = y2[order[1:]].clamp(max=y2[i])
w = (xx2-xx1).clamp(min=0)
h = (yy2-yy1).clamp(min=0)
inter = w*h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
ids = (ovr<=threshold).nonzero().squeeze()
if ids.numel() == 0:
break
order = order[ids+1]
return torch.LongTensor(keep)
def encode(self, boxes, classes, threshold=0.3):#0.35
'''
boxes:[num_obj, 4]
default_box (x1,y1,w,h)
return:boxes: (tensor) [num_obj,21824,4]
classes:class label [obj,]
'''
boxes_org = boxes
default_boxes = self.default_boxes # [21824,4]
iou = self.iou(
boxes,
torch.cat([default_boxes[:, :2] - default_boxes[:, 2:] / 2, # (x0,y0,w,h) => (x1,y1,x2,y2)
default_boxes[:, :2] + default_boxes[:, 2:] / 2], 1)) # iou_size = (num_obj, 21824)
# find max iou of each face in default_box
max_iou, max_iou_index = iou.max(1)
# find max iou of each default_box in faces
iou, max_index = iou.max(0)
# ensure every face have a default box,I think is no use.
# max_index[max_iou_index] = torch.LongTensor(range(num_obj))
boxes = boxes[max_index] # [21824,4] 是图像label, use conf to control is or not.
variances = [0.1, 0.2]
cxcy = (boxes[:, :2] + boxes[:, 2:]) / 2 - default_boxes[:, :2] # [21824,2]
cxcy /= variances[0] * default_boxes[:, 2:]
wh = (boxes[:, 2:] - boxes[:, :2]) / default_boxes[:, 2:] # [21824,2] 为什么会出现0宽度??
wh = torch.log(wh) / variances[1] # Variable
inf_flag = wh.abs() > 10000
if (inf_flag.sum().item() is not 0):
print('inf_flag has true', wh, boxes)
print('org_boxes', boxes_org)
print('max_iou', max_iou, 'max_iou_index', max_iou_index)
raise 'inf error'
loc = torch.cat([cxcy, wh], 1) # [21824,4]
conf = classes[max_index] # 其实都是1 [21824,]
conf[iou < threshold] = 0 # iou小的设为背景
#conf[max_iou_index] = 1 # 这么设置有问题,loc loss 会导致有inf loss,从而干扰训练,
# 去掉后,损失降的更稳定些,是因为widerFace数据集里有的label
# 做的宽度为0,但是没有被滤掉,是因为max(1)必须为每一个object选择一个
# 与之对应的default_box,需要修改数据集里的label。
return loc, conf
def decode(self, loc, conf):
'''
將预测出的 loc/conf转换成真实的人脸框
loc [21842,4]
conf [21824,2]
'''
#variances = torch.Tensor().cuda()
#print (variances)
variances= [0.1, 0.2]
cxcy = loc[:,:2] * variances[0] * self.default_boxes[:,2:] + self.default_boxes[:,:2]
wh = torch.exp(loc[:,2:] * variances[1]) * self.default_boxes[:,2:]
boxes = torch.cat([cxcy-wh/2,cxcy+wh/2],1) #[21824,4]
# conf[:,0] means no face
# conf[:,1] means face
# filter face by a value of 0.4
#conf[:,0] = 0.4
max_conf, labels = conf.max(1) #[21842,1]
if labels.long().sum().item() is 0:
# no face in image
return torch.tensor([0]), torch.tensor([0]), torch.tensor([0])
ids = labels.nonzero().squeeze(1)
# print('ids', ids)
# print('boxes', boxes.size(), boxes[ids])
keep = self.nms(boxes[ids],max_conf[ids])
return boxes[ids][keep], labels[ids][keep], max_conf[ids][keep]