-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_create.py
152 lines (130 loc) · 3.79 KB
/
data_create.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: utf-8 -*-
"""data.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1CZB42-sVXTCcYH8dirg_y9vzMsXDmzdP
#import
"""
from google.colab import drive
drive.mount('/content/drive')
!pip install pytube
from pytube import YouTube
import pandas as pd
import numpy as np
from google.colab.patches import cv2_imshow
import cv2
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import re
import pickle
"""#dataframe"""
def modifying_df(df):
v=[]
for i in range(len(df)):
youtube_video_url = 'https://www.youtube.com/watch?v='+df.loc[i,'youtube_id']
#print(youtube_video_url)
try:
#print('in try')
yt_obj = YouTube(youtube_video_url)
filters = yt_obj.streams.filter(progressive=True, file_extension='mp4')
a=filters.get_highest_resolution()
df.loc[i,'frames']=a.fps
df.loc[i,'highres']=re.sub('\D','',a.resolution)
b=filters.get_lowest_resolution()
df.loc[i,'lowres']=re.sub('\D','',b.resolution)
except:
print('in except')
continue
return df,v
path='/content/drive/MyDrive/Project/action recognition/700_2020/validate.csv'
df=pd.read_csv(path)
df0=df.copy()
print(df.shape)
## taking only one label at a time
df=df[df['label']=='testifying'].copy()
print(df.shape)
df=df.reset_index(drop=True)
df.index.name='testifying'
df=df.drop(columns=['split','label'])
#df.head()
df,v=final_1(df)
#print(v.shape)
print(df['highres'].unique())
print(df['frames'].unique())
print(df['lowres'].unique())
print(df.isnull().sum())
df=df.dropna()
df.to_pickle('/content/drive/MyDrive/Project/action recognition/outputs/try_1.pkl')
"""#data_create"""
def pre(frame):
frame=cv2.resize(frame,(224,224))
return frame
def select(video_frames,frames):
selected=[]
if len(video_frames)>frames:
selected=video_frames[:frames]
else:
selected=video_frames
while len(selected)<frames:
selected.append(video_frames[-1])
return selected
def func(start,end):
frames=300
ffmpeg_extract_subclip("/content/yt_videomp4.mp4", start, end, targetname="yt_croped.mp4")
cap = cv2.VideoCapture('/content/yt_croped.mp4')
video_frames=[]
while True:
ret,frame=cap.read()
if ret==True:
video_frames.append(pre(frame))
else:
break
cap.release()
v=select(video_frames,frames)
if len(v)!=frames:
print('short')
ar=np.array(v)
print(ar.shape)
return ar
def final(df):
v=[]
for i in range(len(df)):
youtube_video_url = 'https://www.youtube.com/watch?v='+df.loc[i,'youtube_id']
print('\n',i)
try:
yt_obj = YouTube(youtube_video_url)
filters = yt_obj.streams.filter(progressive=True, file_extension='mp4')
start=df.loc[i,'time_start']
end=df.loc[i,'time_end']
filters.get_lowest_resolution().download(filename='yt_video.mp4')
ar=func(start,end)
v.append(ar)
except:
print('\n in except')
continue
vs=np.array(v)
return vs
path='/content/drive/MyDrive/Projects/action recognition/outputs/try_1.pkl'
df=pd.read_pickle(path)
print(df.shape)
df=df.reset_index(drop=True)
#df.head(3)
vs=final(df)
v=vs/vs.max()
def saving(i,v):
g=v[i*10:(i+1)*10,:,:,:,:]
with open('/content/drive/MyDrive/Projects/action recognition/outputs/try_2_{}.pkl'.format(i+1),'wb') as f:
pickle.dump(g,f)
del g
del f
f.close()
print('saved ',i+1)
for i in range(len(vs)//10):
saving(i,v)
if len(vs)%10!=0:
g=v[-(len(vs)%10):,:,:,:,:]
with open('/content/drive/MyDrive/Projects/action recognition/outputs/try_2_{}.pkl'.format((len(vs)//10)+1),'wb') as f:
pickle.dump(g,f)
f.close()
del g
del f
print('saved ',(len(vs)//10)+1)