-
Notifications
You must be signed in to change notification settings - Fork 1
/
main_gui.py
247 lines (207 loc) · 10.2 KB
/
main_gui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import sys
import cv2 as cv
import numpy as np
from PyQt5.QtWidgets import (QApplication, QFileDialog, QColorDialog,
QMainWindow)
from gui import *
from lib.utils import get_image_view, OcrReader, check_dir
from lib.threads import TimelineThread, GenSubThread
from lib.core.split import if_srt_frame
class MyWindow(QMainWindow, Ui_VideoOCR):
# 初始化
def __init__(self, parent=None):
super(MyWindow, self).__init__(parent)
self.setupUi(self)
self.videoView.my_scene.setSceneRect(0, 0, 544, 306)
self.clipView.my_scene.setSceneRect(0, 0, 920, 90)
# 注册监听
# 打开文件的项
self.openFile.triggered.connect(self.open_file)
self.videoBar.sliderMoved.connect(self.video_bar)
self.videoBar.sliderReleased.connect(self.video_bar)
# 参数配置项
self.colorButton1.clicked.connect(self.upper_color)
self.colorButton2.clicked.connect(self.lower_color)
# 生成时间轴的项
self.testButton.clicked.connect(self.test_seg)
self.genTime.clicked.connect(self.gen_time)
# 生成字幕的项
self.genSub.clicked.connect(self.gen_sub)
# 初始化实例变量
self.video_path = None
self.video = None
self.video_total_frame = 0
self.frame_idx = 0
self.frame = None
self.hasOpen = False
check_dir()
def open_file(self):
"""
打开视频,并显示第一帧
"""
video_name, video_type = QFileDialog.getOpenFileName(self, "打开视频", "", "*.mp4;;*.mkv;;*.avi;;*.rmvb")
if not video_name:
return
if not video_type not in ['.mp4', '.mkv', '.avi', '.rmvb']:
raise ValueError("Not a video file:{}".format(video_name))
print(video_name, video_type)
self.video_path = video_name
self.video = cv.VideoCapture(video_name)
ret, self.frame = self.video.read()
if not ret:
raise ValueError("An empty video:{}".format(video_name))
self.video_total_frame = self.video.get(7)
# 缩放图像为适应窗口的大小
# 获得缩放比例
if self.videoView.old_img_item:
self.videoView.my_scene.removeItem(self.videoView.old_img_item)
self.videoView.old_img_item = get_image_view(self.videoView.my_scene, self.videoView, self.frame)
if self.videoView.old_rect_item:
self.videoView.my_scene.removeItem(self.videoView.old_rect_item)
self.videoView.my_scene.addItem(self.videoView.old_rect_item)
self.videoView.setScene(self.videoView.my_scene)
self.hasOpen = True
self.videoView.setStyleSheet("background:transparent;padding:0px;border:0px")
def video_bar(self):
"""
通过 slider 所处位置确定
"""
if not self.hasOpen:
return
bar_min = self.videoBar.minimum()
bar_max = self.videoBar.maximum()
bar_pos = self.videoBar.value()
bar_ratio = (bar_pos - bar_min) / (bar_max - bar_min)
self.frame_idx = min(self.video_total_frame - 1, int(self.video_total_frame * bar_ratio))
self.frame_idx = max(0, self.frame_idx)
self.video.set(cv.CAP_PROP_POS_FRAMES, self.frame_idx) # 设置要获取的帧号
_, self.frame = self.video.read()
if self.videoView.old_img_item:
self.videoView.my_scene.removeItem(self.videoView.old_img_item)
self.videoView.old_img_item = get_image_view(self.videoView.my_scene, self.videoView, self.frame)
if self.videoView.old_rect_item:
self.videoView.my_scene.removeItem(self.videoView.old_rect_item)
self.videoView.my_scene.addItem(self.videoView.old_rect_item)
self.videoView.setScene(self.videoView.my_scene)
def upper_color(self):
col = QColorDialog.getColor()
if col.isValid():
color_type = self.segMethod.itemText(self.segMethod.currentIndex())
if color_type == "RGB":
upper_str = '{},{},{}'.format(col.red(), col.green(), col.blue())
# 将选择的颜色输出在输入框.
self.maxValue.setText(upper_str)
else:
upper_str = '{},{},{}'.format(col.hue(), col.saturation(), col.value())
# 将选择的颜色输出在输入框.
self.maxValue.setText(upper_str)
def lower_color(self):
col = QColorDialog.getColor()
if col.isValid():
color_type = self.segMethod.itemText(self.segMethod.currentIndex())
if color_type == "RGB":
lower_str = '{},{},{}'.format(col.red(), col.green(), col.blue())
# 将选择的颜色输出在输入框.
self.minValue.setText(lower_str)
else:
lower_str = '{},{},{}'.format(col.hue(), col.saturation(), col.value())
# 将选择的颜色输出在输入框.
self.minValue.setText(lower_str)
def test_seg(self):
if not self.hasOpen:
return
rec_pos = self.videoView.rect.getRect()
if sum(rec_pos) == 0:
return
view_height = self.videoView.my_scene.height()
view_width = self.videoView.my_scene.width()
frame_height = self.frame.shape[0]
frame_width = self.frame.shape[1]
vid_x = self.videoView.old_img_item.x()
vid_y = self.videoView.old_img_item.y()
h_ratio = frame_height / (view_height - 2 * vid_y)
w_ratio = frame_width / (view_width - 2 * vid_x)
box = [[int((rec_pos[1]-vid_y)*h_ratio), int((rec_pos[1]+rec_pos[3]-vid_y)*h_ratio)],
[int((rec_pos[0]-vid_x)*w_ratio), int((rec_pos[0]+rec_pos[2]-vid_x)*w_ratio)]]
srt_prob_thres = float(self.srtThres.text())
seg_method = self.segMethod.itemText(self.segMethod.currentIndex())
upper_value = self.maxValue.text()
lower_value = self.minValue.text()
upper_values = np.array([int(i) for i in upper_value.split(',')])
lower_values = np.array([int(i) for i in lower_value.split(',')])
clipped_frame = self.frame[box[0][0]:box[0][1], box[1][0]:box[1][1]]
frame_seg, has_srt = if_srt_frame(clipped_frame, upper_values, lower_values, seg_method, srt_prob_thres)
# 缩放图像为适应窗口的大小
# 获得缩放比例
self.clipView.my_scene.clear()
self.clipView.old_img_item = get_image_view(self.clipView.my_scene, self.clipView, frame_seg)
self.clipView.setScene(self.clipView.my_scene)
self.clipView.setStyleSheet("background:transparent;padding:0px;border:0px")
if has_srt:
self.isSrt.setText('检测结果: 该帧包含字幕')
else:
self.isSrt.setText('检测结果: 该帧不包含字幕')
def gen_time(self):
if not self.hasOpen:
return
save_frames = self.saveFrames.isChecked()
seg_method = self.segMethod.itemText(self.segMethod.currentIndex())
upper_value = self.maxValue.text()
lower_value = self.minValue.text()
upper_values = np.array([int(i) for i in upper_value.split(',')])
lower_values = np.array([int(i) for i in lower_value.split(',')])
srt_prob_thres = float(self.srtThres.text())
change_prob_thres = float(self.chgThres.text())
# lang = 'ch_sim'
lang_box_text = self.langBox.itemText(self.langBox.currentIndex())
lang = [lang_box_text] if lang_box_text != "dual" else ['ch_sim', 'en']
rec_pos = self.videoView.rect.getRect()
if sum(rec_pos) == 0:
return
view_height = self.videoView.my_scene.height()
view_width = self.videoView.my_scene.width()
frame_height = self.frame.shape[0]
frame_width = self.frame.shape[1]
vid_x = self.videoView.old_img_item.x()
vid_y = self.videoView.old_img_item.y()
h_ratio = frame_height / (view_height - 2 * vid_y)
w_ratio = frame_width / (view_width - 2 * vid_x)
box = [[int((rec_pos[1] - vid_y) * h_ratio), int((rec_pos[1] + rec_pos[3] - vid_y) * h_ratio)],
[int((rec_pos[0] - vid_x) * w_ratio), int((rec_pos[0] + rec_pos[2] - vid_x) * w_ratio)]]
# split_vision(self.video, self.video_path, upper_values, lower_values, seg_method, box,
# self.progressBar, lang, srt_prob_thres, change_prob_thres, save_frames)
split_thread = TimelineThread(self.video, self.video_path, upper_values, lower_values, seg_method, box,
self, self.progressBar, lang, srt_prob_thres, change_prob_thres, save_frames)
split_thread.run()
def gen_sub(self):
if not self.hasOpen:
return
rec_pos = self.videoView.rect.getRect()
if sum(rec_pos) == 0:
return
view_height = self.videoView.my_scene.height()
view_width = self.videoView.my_scene.width()
frame_height = self.frame.shape[0]
frame_width = self.frame.shape[1]
vid_x = self.videoView.old_img_item.x()
vid_y = self.videoView.old_img_item.y()
h_ratio = frame_height / (view_height - 2 * vid_y)
w_ratio = frame_width / (view_width - 2 * vid_x)
box = [[int((rec_pos[1] - vid_y) * h_ratio), int((rec_pos[1] + rec_pos[3] - vid_y) * h_ratio)],
[int((rec_pos[0] - vid_x) * w_ratio), int((rec_pos[0] + rec_pos[2] - vid_x) * w_ratio)]]
ocr_method = self.ocrMethod.itemText(self.ocrMethod.currentIndex())
# lang = ['ch_sim']
lang_box_text = self.langBox.itemText(self.langBox.currentIndex())
lang = [lang_box_text] if lang_box_text != "dual" else ['ch_sim', 'en']
# TODO: 自动转成各种OCR需要的缩写
ocr_reader = OcrReader(ocr_method, lang)
ass_path = "output/split_vision.ass"
# ocr_with_timeline(self.video, box, ocr_reader, ass_path, lang, self.progressBar)
gen_sub_thread = GenSubThread(self.video, self.video_path, box,
ocr_reader, lang, self, self.progressBar)
gen_sub_thread.run()
if __name__ == '__main__':
app = QApplication(sys.argv)
myWin = MyWindow()
myWin.show()
sys.exit(app.exec_())