-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_valid_split_all.py
96 lines (83 loc) · 3.64 KB
/
train_valid_split_all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from genericpath import exists
import glob
import argparse
import math
import random
import os
import shutil
from tqdm import tqdm
parser = argparse.ArgumentParser()
parser.add_argument('--data-root', type=str, default='/eva_data/zchin/rsna_data_all',
help='trainig image saving directory')
parser.add_argument('--ratio', type=float, default=0.2,
help='validation data ratio')
args = parser.parse_args()
if __name__ == '__main__':
# split data that has objects
src_img_dir = os.path.join(args.data_root, 'images/all_train')
data_size = len(glob.glob1(src_img_dir, "*.png"))
valid_size = math.floor(data_size * args.ratio)
img_list = []
for img_path in glob.glob(f'{src_img_dir}/*.png'):
img_list.append(img_path)
idx = random.sample(range(data_size), valid_size)
dest_img_dir = os.path.join(args.data_root, 'images')
train_img_dir = os.path.join(dest_img_dir, 'train')
valid_img_dir = os.path.join(dest_img_dir, 'val')
src_label_dir = os.path.join(args.data_root, 'annotations/all_train')
train_label_dir = src_label_dir.replace('all_train', 'train')
valid_label_dir = src_label_dir.replace('all_train', 'val')
# if not os.path.isdir(dest_img_dir):
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(valid_img_dir, exist_ok=True)
os.makedirs(train_label_dir, exist_ok=True)
os.makedirs(valid_label_dir, exist_ok=True)
pbar = tqdm(range(data_size))
for i in pbar:
pbar.set_description(img_list[i])
if i in idx:
src_img = img_list[i]
dest_img = src_img.replace('all_train', 'val')
shutil.copy(src_img, dest_img)
src_label = src_img.replace(
'images', 'annotations').replace('png', 'txt')
dest_label = src_label.replace('all_train', 'val')
shutil.copyfile(src_label, dest_label)
else:
src_img = img_list[i]
dest_img = src_img.replace('all_train', 'train')
shutil.copy(src_img, dest_img)
src_label = src_img.replace(
'images', 'annotations').replace('png', 'txt')
dest_label = src_label.replace('all_train', 'train')
shutil.copyfile(src_label, dest_label)
# split data that doesn't have object
src_img_dir = os.path.join(args.data_root, 'images/all_train_no_obj')
data_size = len(glob.glob1(src_img_dir, "*.png"))
valid_size = math.floor(data_size * args.ratio)
img_list = []
for img_path in glob.glob(f'{src_img_dir}/*.png'):
img_list.append(img_path)
idx = random.sample(range(data_size), valid_size)
pbar = tqdm(range(data_size))
for i in pbar:
pbar.set_description(img_list[i])
if i in idx:
src_img = img_list[i]
dest_img = src_img.replace('all_train_no_obj', 'val')
shutil.copy(src_img, dest_img)
src_label = src_img.replace(
'images', 'annotations').replace('png', 'txt')
dest_label = src_label.replace('all_train_no_obj', 'val')
shutil.copyfile(src_label, dest_label)
else:
src_img = img_list[i]
dest_img = src_img.replace('all_train_no_obj', 'train')
shutil.copy(src_img, dest_img)
src_label = src_img.replace(
'images', 'annotations').replace('png', 'txt')
dest_label = src_label.replace('all_train_no_obj', 'train')
shutil.copyfile(src_label, dest_label)
train_size = len(glob.glob1(train_img_dir, "*.png"))
valid_size = len(glob.glob1(valid_img_dir, "*.png"))
print(f'train size: {train_size}\tvalid size: {valid_size}')