-
Notifications
You must be signed in to change notification settings - Fork 93
/
gen_dataset_lists.py
107 lines (88 loc) · 4.58 KB
/
gen_dataset_lists.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# processing the raw data of the video datasets (something-something and jester)
# generate the meta files:
# category.txt: the list of categories.
# train_videofolder.txt: each row contains [videoname num_frames classIDX]
# val_videofolder.txt: same as above
#
# Created by: Can Zhang
# github: @zhang-can, May,27th 2018
#
import argparse
import os
parser = argparse.ArgumentParser()
parser.add_argument('dataset', type=str, choices=['something', 'jester', 'ucf101', 'hmdb51', 'activitynet_1.2', 'activitynet_1.3'])
parser.add_argument('frame_path', type=str, help="root directory holding the frames")
parser.add_argument('--labels_path', type=str, default='data/dataset_labels/', help="root directory holding the 20bn csv files: labels, train & validation")
parser.add_argument('--out_list_path', type=str, default='data/')
parser.add_argument('--rgb_prefix', type=str, help="prefix of RGB frames", default='img_')
parser.add_argument('--flow_x_prefix', type=str, help="prefix of x direction flow images", default='flow_x')
parser.add_argument('--flow_y_prefix', type=str, help="prefix of y direction flow images", default='flow_y')
parser.add_argument('--num_split', type=int, default=3)
parser.add_argument('--shuffle', action='store_true', default=True)
args = parser.parse_args()
dataset = args.dataset
labels_path = args.labels_path
frame_path = args.frame_path
if dataset == 'something':
import pdb
dataset_name = 'something-something-v1'
print('\nProcessing dataset: {}\n'.format(dataset))
print('- Generating {}_category.txt ......'.format(dataset))
with open(os.path.join(labels_path, '{}-labels.csv'.format(dataset_name))) as f:
lines = f.readlines()
categories = []
for line in lines:
line = line.rstrip()
categories.append(line)
categories = sorted(categories)
open(os.path.join(args.out_list_path, '{}_category.txt'.format(dataset)),'w').write('\n'.join(categories))
print('- Saved as:', os.path.join(args.out_list_path, '{}_category.txt!\n'.format(dataset)))
dict_categories = {}
for i, category in enumerate(categories):
dict_categories[category] = i
files_input = ['{}-validation.csv'.format(dataset_name),'{}-train.csv'.format(dataset_name)]
files_output = ['{}_val.txt'.format(dataset),'{}_train.txt'.format(dataset)]
for (filename_input, filename_output) in zip(files_input, files_output):
with open(os.path.join(labels_path, filename_input)) as f:
lines = f.readlines()
folders = []
idx_categories = []
for line in lines:
line = line.rstrip()
items = line.split(';')
folders.append(items[0])
idx_categories.append(os.path.join(str(dict_categories[items[1]])))
output = []
for i in range(len(folders)):
curFolder = folders[i]
curIDX = idx_categories[i]
# counting the number of frames in each video folders
dir_files = os.listdir(os.path.join(frame_path, curFolder))
output.append('{} {} {}'.format(os.path.join(frame_path, curFolder), len(dir_files), curIDX))
if i % 1000 == 0:
print('- Generating {} ({}/{})'.format(filename_output, i, len(folders)))
with open(os.path.join(args.out_list_path, filename_output),'w') as f:
f.write('\n'.join(output))
print('- Saved as:', os.path.join(args.out_list_path, '{}!\n'.format(filename_output)))
elif dataset == 'ucf101':
import sys
from pyActionRecog import parse_directory, build_split_list
from pyActionRecog import parse_split_file
rgb_p = args.rgb_prefix
flow_x_p = args.flow_x_prefix
flow_y_p = args.flow_y_prefix
num_split = args.num_split
out_path = args.out_list_path
shuffle = args.shuffle
# operation
print('\nProcessing dataset {}:\n'.format(dataset))
split_tp = parse_split_file(dataset)
f_info = parse_directory(frame_path, rgb_p, flow_x_p, flow_y_p)
print('- Writing list files for training/testing')
for i in range(max(num_split, len(split_tp))):
lists = build_split_list(split_tp, f_info, i, shuffle)
open(os.path.join(out_path, '{}_rgb_train_split_{}.txt'.format(dataset, i+1)), 'w').writelines(lists[0][0])
open(os.path.join(out_path, '{}_rgb_val_split_{}.txt'.format(dataset, i+1)), 'w').writelines(lists[0][1])
print('- List files successfully saved to "data/" folder!\n')
else:
print('"{}" dataset have not been tested yet!'.format(dataset))