Skip to content

Commit e445a1c

Browse files
committed
Create preprocessing.py
1 parent 422d708 commit e445a1c

File tree

1 file changed

+287
-0
lines changed

1 file changed

+287
-0
lines changed

preprocessing.py

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Fri 20 14:48:49 2020
4+
5+
@author: danish
6+
"""
7+
#Import the necessary libraries
8+
import cv2 # for capturing videos
9+
import os
10+
import math
11+
from tqdm import tqdm
12+
import numpy as np
13+
import glob
14+
from numba import njit
15+
16+
17+
def Frame_Extractor(v_file, path='./', ext='.avi', frames_dir='train_1', extract_rate='all', frames_ext='.jpg'):
18+
"""
19+
A method which extracts the frames from the guven video. It can ex
20+
21+
Parameters
22+
----------
23+
24+
v_file : str
25+
Name of the video file, without extension.
26+
27+
path : str
28+
Path to the video file, if the video is in the current working directory do not specify this argument.
29+
30+
ext : str, optional
31+
Extension of the given Video File e.g `.avi`, `.mp4`. The default is '.avi'.
32+
33+
frames_dir : str, optional
34+
Path to the directory where frames will be saved. The default is 'train_1'.
35+
36+
extract_rate : int or str, optional
37+
This argument specifies how many frames should be extrcated from each 1 second of video. If the value is
38+
`all` it will etract all the frames in every second i.e if the frame rate of video is 25 fps it will
39+
extrcat all 25 frames. Other wise specify a number if you want to etract specific numbers of frames
40+
per each second e.g if 5 is given it will extrcat 5 frames from each 1 second. The default is `all`.
41+
42+
frames_ext : str, optional
43+
The extension for the extracted frames/images e.h '.tif' or '.jpg'. The default is '.jpg'.
44+
45+
Returns
46+
-------
47+
None.
48+
49+
"""
50+
os.makedirs(frames_dir, exist_ok=True)
51+
# capturing the video from the given path
52+
cap = cv2.VideoCapture(path+v_file+ext)
53+
54+
frameRate = cap.get(5) #frame rate
55+
56+
#duration = int(cap.get(7)/frameRate)
57+
os.makedirs(frames_dir+'/'+v_file, exist_ok=True)
58+
count = 0
59+
while(cap.isOpened()):
60+
frameId = cap.get(1) #current frame number
61+
ret, frame = cap.read()
62+
if (ret != True):
63+
break
64+
65+
if type(extract_rate)==int:
66+
if extract_rate>frameRate:
67+
print('Frame rate of Given Video: {0} fps'.format(frameRate))
68+
raise ValueError('The value of `extract_rate` argument can not be greater than the Frame Rate of the video.')
69+
70+
if (frameId % extract_rate == 0) and extract_rate>1:
71+
# storing the frames in a new folder named train_1
72+
filename = frames_dir + '/' + v_file+ '/'+"_frame{0}".format(count)+frames_ext;count+=1
73+
cv2.imwrite(filename, frame)
74+
elif extract_rate==1:
75+
if (frameId % math.floor(frameRate) == 0):
76+
filename = frames_dir + '/' + v_file+ '/'+"_frame{0}".format(count)+frames_ext;count+=1
77+
cv2.imwrite(filename, frame)
78+
elif type(extract_rate)==str:
79+
if extract_rate=='all':
80+
# storing the frames in a new folder named train_1
81+
filename = frames_dir + '/' + v_file+ '/'+ v_file + "_frame{0}".format(count)+frames_ext;count+=1
82+
cv2.imwrite(filename, frame)
83+
else:
84+
raise ValueError('Invalid Value for argument `extract_rate`, it can be either `all` or an integer value.')
85+
cap.release()
86+
87+
88+
def Edge_Detector(image, sigma=0.33):
89+
"""
90+
A method that will apply Canny Edge detection on the images.
91+
92+
Parameters
93+
----------
94+
95+
image : uint8
96+
Image/Frame upon which Canny Edge detection will be applied
97+
98+
sigma : float, optional
99+
Sigma is a real number, typically between 0 and 2. It is Standard Deviation of the
100+
Gaussian. Sigma plays important roles of a scale parameter for the edges: lager values
101+
of sigma produce coarser scale edges and small values of sigma produce finer scale edges.
102+
Larger values of sigma also result in greater noise suppression. Default value is 0.33.
103+
104+
Returns
105+
-------
106+
107+
edged : uint8
108+
Canned Edge images.
109+
110+
"""
111+
# compute the median of the single channel pixel intensities
112+
v = np.median(image)
113+
# apply automatic Canny edge detection using the computed median
114+
lower = int(max(0, (1.0 - sigma) * v))
115+
upper = int(min(255, (1.0 + sigma) * v))
116+
edged = cv2.Canny(image, lower, upper)
117+
# return the edged image
118+
return edged
119+
120+
#@njit
121+
def PreProcessing(img_name, read_path, write_path, canny_edge=True, canny_path=None, sigma=0.33):
122+
"""
123+
This method applies the preprocessing steps i.e Resizing, Canny Edge Detection,
124+
Normalization, Rescaling and Global mean.
125+
126+
Parameters
127+
----------
128+
129+
img_name : string
130+
Name of the frame/image.
131+
132+
read_path : string
133+
Path location for the images.
134+
135+
write_path : string
136+
Location where to save the processed images.
137+
138+
canny_edge : boolean, optional
139+
True: Applies Canny Edge Function
140+
False: Doesn't applies Canny Edge Function. The default is True.
141+
142+
canny_path : string, optional
143+
Path where to store the edge enhanced images. The default is None.
144+
145+
sigma : float, optional
146+
Explained in CannyEdges Documentation. The default is 0.33.
147+
148+
Raises
149+
------
150+
151+
ValueError
152+
This error will occur for following reasons:
153+
1. Image does not exist at the given path.
154+
2. Datatype of the image is None.
155+
156+
Returns
157+
-------
158+
159+
rescaled_image : uint8
160+
Resturns completly processed(Resizing, Canny Edge Detection,
161+
Normalization, Rescaling and Global mean) images
162+
163+
"""
164+
load_image = cv2.imread(read_path, 0)
165+
166+
resized_image = cv2.resize(load_image, (128,128))
167+
168+
if canny_edge:
169+
if canny_path==None:
170+
raise ValueError('Invalid value for argument `canny_path`, the value cannot be `None`, when `canny_edge` flag is set to True. Please provide valid path to this argument.')
171+
edged = Edge_Detector(resized_image, sigma)
172+
os.makedirs(canny_path.split('/')[0], exist_ok=True)
173+
cv2.imwrite(canny_path+'_'+img_name, edged)
174+
175+
rescaled_image = resized_image.astype('float32')
176+
rescaled_image /= 255.0
177+
178+
179+
#Take the global mean of the image
180+
rescaled_image -= rescaled_image.mean()
181+
182+
os.makedirs(write_path.split('/')[0], exist_ok=True)
183+
cv2.imwrite(write_path+'_'+img_name, rescaled_image)
184+
185+
return rescaled_image
186+
187+
188+
def ReadFileNames(path):
189+
"""
190+
This method will retrieve the Folder/File names from the dataset.
191+
192+
Parameters
193+
----------
194+
path : string
195+
Location of the data set that needs to be preprocessed.
196+
197+
Returns
198+
-------
199+
onlyfiles : list
200+
A list containing all the subfolder names in the dataset.
201+
file_names : list
202+
A list containing all the names of the frames/images from 'onlyfiles'.
203+
directories: list
204+
A list containing all the names of the folders containing the images.
205+
"""
206+
directories = [name for name in os.listdir(path) if os.path.isdir(path+'/'+name)]
207+
onlyfiles = []
208+
file_names = []
209+
210+
for i in range (len(directories)):
211+
files = glob.glob(path+'/'+directories[i]+'/*.tif')
212+
names = []
213+
for file in files:
214+
names.append(file.split("\\")[1])
215+
file_names.append(names)
216+
onlyfiles.append(files)
217+
return onlyfiles, file_names, directories
218+
219+
def load_data(orig_frames, canned_frames, seq_size = 8):
220+
'''
221+
A function that will load the preprocessed images, first all the
222+
processed images(orignal and edge detected) will be loaded to x_train,
223+
from there it will be batched(8 images...4orignal...4canned) into 'lst'. And
224+
this lst will be feed to the model in sequence for training.
225+
226+
Parameters
227+
----------
228+
orig_frames : string
229+
Name/path of the folder containing orignal processed images.
230+
canned_frames : string
231+
Name/path of the folder containing canny edged images.
232+
seq_size : integer, optional(fixed)
233+
This argument will decide the number of the images in the batchd,
234+
which in out case should be 8(4 orignal images, 4 canned images). The default is 8.
235+
236+
Returns
237+
-------
238+
x_train : array of type 'floar32'
239+
Array of the all images combined, loaded from processed and canny edged images.
240+
lst : list
241+
A list that will contain 8 images per entery that will be feed to the model for
242+
training.
243+
244+
'''
245+
path = orig_frames
246+
loc = canned_frames
247+
processed_imgs = glob.glob(path+'/*.tif')
248+
cany_imgs = glob.glob(loc+'/*.tif')
249+
lst = []
250+
count = 0
251+
seq_size //= 2
252+
#Images will be read from the path and loaded into 'lst'
253+
for i in tqdm(range(len(processed_imgs)//seq_size)):
254+
seq = []
255+
for j in range(count, count+seq_size):
256+
seq.append(np.expand_dims(cv2.imread(processed_imgs[i], 2), axis = 2))
257+
seq.append(np.expand_dims((cv2.imread(cany_imgs[i], 2)/255), axis = 2))
258+
count += seq_size
259+
lst.append(seq)
260+
#A complete array of all the images combined
261+
X = np.array(lst)
262+
return X, lst
263+
264+
if __name__=='__main__':
265+
#v_file='video'
266+
#path='./'
267+
#frames_ext='.tif'
268+
#frames_dir = 'Extracted_Frames'
269+
#Frame_Extractor(v_file, path='./', ext='.mp4', frames_dir='Extracted_Frames',
270+
# extract_rate='all', frames_ext='.tif')
271+
272+
#path = frames_dir
273+
path = 'UCSDped1/Train'
274+
onlyfiles, file_names, dirs = ReadFileNames(path)
275+
276+
for i in tqdm(range(len(onlyfiles))):
277+
images = onlyfiles[i]
278+
count = 0
279+
for img in images:
280+
img.split('/')
281+
img_name = file_names[i][count]
282+
write_path = 'ProcessedImages/'+dirs[i]
283+
canny_path = 'CannyImages/'+dirs[i]
284+
rescaled_image = PreProcessing(img_name, read_path=img, write_path=write_path, canny_edge=True,
285+
canny_path=canny_path, sigma=0.33)
286+
count += 1
287+

0 commit comments

Comments
 (0)