forked from songdejia/EAST
-
Notifications
You must be signed in to change notification settings - Fork 0
/
roirotate.py
89 lines (80 loc) · 3.52 KB
/
roirotate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import torch.nn.functional as F
import numpy as np
import cv2
import torch
import visdom
"""
box: 1 * num (in a img) * 4 * 2
img: 1 * 3 * 512 * 512
feature: 1 * 32 * 128 *128 1/4 img
output:
rotated_feature: num * w * h * 32
Args:
theta (Tensor): input batch of affine matrices (:math:`N \times 2 \times 3`)
size (torch.Size): the target output image size (:math:`N \times C \times H \times W`)
Example: torch.Size((32, 3, 24, 24))
"""
"""
Args:
input (Tensor): input of shape :math:`(N, C, H_\text{in}, W_\text{in})` (4-D case)
or :math:`(N, C, D_\text{in}, H_\text{in}, W_\text{in})` (5-D case)
grid (Tensor): flow-field of shape :math:`(N, H_\text{out}, W_\text{out}, 2)` (4-D case)
or :math:`(N, D_\text{out}, H_\text{out}, W_\text{out}, 3)` (5-D case)
mode (str): interpolation mode to calculate output values
'bilinear' | 'nearest'. Default: 'bilinear'
padding_mode (str): padding mode for outside grid values
'zeros' | 'border' | 'reflection'. Default: 'zeros'
Returns:
output (Tensor): output Tensor
"""
vis = visdom.Visdom()
def rotate(img, rectangle, feature, ht = 8):
#resize_img = torch.from_numpy(cv2.resize(img[0].permute(1,2,0).cpu().numpy(), (128,128))).permute(2,0,1).cuda()
#vis.image(img[0])
#vis.image(resize_img)
vis.heatmap(feature[0][0])
feature_list = []
rectangle = rectangle.numpy()
w_max = 0
l = [0, 0, 1]
scale = feature.shape[2] / img.shape[2] #if w=h
for rect in rectangle[0]:
rect = rect * scale # position on feature_map, it is the same for computing on image, cause the pts1 is identical through normolized to -1,1
w = rect[1][0] - rect[0][0]
h = rect[2][1] - rect[1][1]
s = ht/h
wt = s*w
if wt > w_max:
w_max = wt
###########normalized coordinates correspond
pts1 = np.float32([[rect[0][0]/(256* scale)-1, rect[0][1]/(256* scale)-1], [rect[1][0]/(256* scale)-1, rect[1][1]/(256* scale)-1], [rect[2][0]/(256* scale)-1, rect[2][1]/(256* scale)-1]]) #顺时针
pts2 = np.float32([[-1, -1], [1, -1], [1, 1]]) #pts1和pts2的点位置要对应
M = cv2.getAffineTransform(pts1, pts2)
print(M)
###########inverse M to adopt affine_grid
param = np.vstack((M, l))
param_inv = np.linalg.inv(param)
M_inv = np.delete(param_inv, (2), axis=0)
print(M_inv)
grid = F.affine_grid(torch.from_numpy(M_inv[None]), torch.Size((1, feature.shape[1], ht, int(wt) + 1)))
#grid = F.affine_grid(torch.from_numpy(M_inv[None]), torch.Size((1, feature.shape[1], int(h), int(w)))) #before resized to ht = 8
rotated_feature = F.grid_sample(feature, grid.float().cuda())
#crop_img = F.grid_sample(img, grid.float().cuda())
#resized = F.grid_sample(torch.unsqueeze(resize_img, 0), grid.float().cuda())
#vis.image(crop_img[0])
#vis.image(resized[0])
vis.heatmap(rotated_feature[0][0])
feature_list.append(rotated_feature)
w_max = int(w_max) + 1
feature_padded = []
for f in feature_list:
w_f = f.shape[3]
w_pad = w_max - w_f
C = f.shape[1]
pad = torch.zeros(1, C, 8, w_pad)
f = f.detach().cpu().numpy()
feature = torch.tensor(np.concatenate((f, pad.numpy()), axis=3))
vis.heatmap(feature[0][0])
feature_padded.append(feature)
rec_feature = torch.stack(feature_padded, 0)
return rec_feature