-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdemo_show.py
180 lines (138 loc) · 5.59 KB
/
demo_show.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import numpy as np
import time
import cv2
import torch
from torch.autograd import Variable
import lib.utils.utils as utils
import lib.models.crnn as crnn
import lib.config.alphabets as alphabets
import yaml
from easydict import EasyDict as edict
import os
import random
def deal_cfg(path_cfg):
with open(path_cfg, 'r') as f:
config = yaml.load(f)
config = edict(config)
config.DATASET.ALPHABETS = alphabets.alphabet
config.MODEL.NUM_CLASSES = len(config.DATASET.ALPHABETS)
return config
def recognition(config, img, model, converter, device):
# github issues: https://github.com/Sierkinhane/CRNN_Chinese_Characters_Rec/issues/211
# h, w = img.shape
# fisrt step: resize the height and width of image to (32, x)
# img = cv2.resize(img, (0, 0), fx=config.MODEL.IMAGE_SIZE.H / h, fy=config.MODEL.IMAGE_SIZE.H / h, interpolation=cv2.INTER_CUBIC)
# second step: keep the ratio of image's text same with training
# h, w = img.shape
# w_cur = int(img.shape[1] / (config.MODEL.IMAGE_SIZE.OW / config.MODEL.IMAGE_SIZE.W))
# img = cv2.resize(img, (0, 0), fx=w_cur / w, fy=1.0, interpolation=cv2.INTER_CUBIC)
# img = np.reshape(img, (config.MODEL.IMAGE_SIZE.H, w_cur, 1))
# img_h, img_w, _ = img.shape
# img = cv2.resize(img, (0,0), fx=self.inp_w / img_w, fy=self.inp_h / img_h, interpolation=cv2.INTER_CUBIC)
# img = np.reshape(img, (config.MODEL.IMAGE_SIZE.H, config.MODEL.IMAGE_SIZE.W, 3))
# normalize
img = img.astype(np.float32)
img = (img / 255. - config.DATASET.MEAN) / config.DATASET.STD
img = img.transpose([2, 0, 1])
img = torch.from_numpy(img)
img = img.type(torch.FloatTensor)
img = img.to(device)
img = img.view(1, *img.size())
model.eval()
preds = model(img)
# traced_script_module = torch.jit.trace(model, img)
# traced_script_module.save("./model.pt")
print("preds size=",preds.shape)
_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)
preds_size = Variable(torch.IntTensor([preds.size(0)]))
sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
# print('results: {0}'.format(sim_pred))
return sim_pred
def LstmImgStandardization(img, ratio, stand_w, stand_h):
img_h, img_w, _ = img.shape
if img_h < 2 or img_w < 2:
return
if 32 == img_h and 320 == img_w:
return img
ratio_now = img_w * 1.0 / img_h
if ratio_now <= ratio:
mask = np.ones((img_h, int(img_h * ratio), 3), dtype=np.uint8) * 255
mask[0:img_h,0:img_w,:] = img
else:
mask = np.ones((int(img_w*1.0/ratio), img_w, 3), dtype=np.uint8) * 255
mask[0:img_h, 0:img_w, :] = img
mask_stand = cv2.resize(mask,(stand_w, stand_h),interpolation=cv2.INTER_AREA)
return mask_stand
suffix_list = [".jpg",".jpeg",".png",".JPG",".JPEG",".PNG"]
def get_file_path_list(path):
cnt_ = 0
imagePathList = []
for root, dir, files in os.walk(path):
for file in files:
try:
full_path = os.path.join(root, file)
suffix_img = full_path.rsplit(".",1)
if(len(suffix_img)<2):
continue
suffix_img = "." + suffix_img[-1]
if suffix_img in suffix_list:
cnt_ += 1
print (cnt_, " :: ", full_path)
imagePathList.append(full_path)
except IOError:
continue
print("=====end get_file_path_list============================\n")
return imagePathList
def get_label(img_path):
pos_2 = img_path.rfind(".")
pos_1 = img_path.rfind("_")
label = img_path[pos_1+1:pos_2]
label = label.replace("@", "/")
return label
if __name__ == '__main__':
path_cfg = "./lib/config/OWN_config.yaml"
path_pth = "./checkpoint_54_acc_0.9833.pth"
path_pth = "./checkpoint_54_acc_0.9833.pth"
path_img_dir = "./myfile/data_sample/val/2/"
print("cuda?", torch.cuda.is_available())
config = deal_cfg(path_cfg)
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
model = crnn.get_crnn(config).to(device)
print('loading pretrained model from {0}'.format(path_pth))
checkpoint = torch.load(path_pth)
if 'state_dict' in checkpoint.keys():
model.load_state_dict(checkpoint['state_dict'])
else:
model.load_state_dict(checkpoint)
list_path_img = get_file_path_list(path_img_dir)
random.shuffle(list_path_img)
cnt_all = 0
cnt_right = 0
for cnt,path_img in enumerate(list_path_img):
print(cnt,path_img)
started = time.time()
img_src = cv2.imread(path_img)
if img_src is None:
continue
cnt_all += 1
img = LstmImgStandardization(img_src, 10, config.MODEL.IMAGE_SIZE.W, config.MODEL.IMAGE_SIZE.H)
# img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
converter = utils.strLabelConverter(config.DATASET.ALPHABETS)
rec = recognition(config, img, model, converter, device)
finished = time.time()
print('elapsed time: {0}'.format(finished - started))
ans = get_label(path_img)
if ans == rec:
cnt_right += 1
print("cnt_right=", cnt_right)
print("cnt_all=", cnt_all)
print("ratio=", cnt_right * 1.0 / cnt_all)
print("ans=", ans)
print("rec=", rec, "\n")
cv2.imshow("src", img_src)
cv2.waitKey(0)
print("============end===========================================")
print("end:: cnt_right=",cnt_right)
print("end:: cnt_all=", cnt_all)
print("end:: ratio=", cnt_right * 1.0 / cnt_all)