-
Notifications
You must be signed in to change notification settings - Fork 35
/
preprocess_eyepacs.py
68 lines (52 loc) · 2.13 KB
/
preprocess_eyepacs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import argparse
import csv
import sys
from shutil import rmtree
from PIL import Image
from glob import glob
from os import makedirs, rename
from os.path import join, splitext, basename, exists
from lib.preprocess import resize_and_center_fundus
parser = argparse.ArgumentParser(description='Preprocess EyePACS data set.')
parser.add_argument("--data_dir", help="Directory where EyePACS resides.",
default="data/eyepacs")
args = parser.parse_args()
data_dir = str(args.data_dir)
train_labels = join(data_dir, 'trainLabels.csv')
test_labels = join(data_dir, 'testLabels.csv')
# Create directories for grades.
[makedirs(join(data_dir, str(i))) for i in [0, 1, 2, 3, 4]
if not exists(join(data_dir, str(i)))]
# Create a tmp directory for saving temporary preprocessing files.
tmp_path = join(data_dir, 'tmp')
if exists(tmp_path):
rmtree(tmp_path)
makedirs(tmp_path)
failed_images = []
for labels in [train_labels, test_labels]:
with open(labels, 'r') as f:
reader = csv.reader(f, delimiter=',')
next(reader)
for i, row in enumerate(reader):
basename, grade = row[:2]
im_path = glob(join(data_dir, "{}*".format(basename)))[0]
# Find contour of eye fundus in image, and scale
# diameter of fundus to 299 pixels and crop the edges.
res = resize_and_center_fundus(save_path=tmp_path,
image_path=im_path,
diameter=299, verbosity=0)
# Status message.
msg = "\r- Preprocessing image: {0:>7}".format(i+1)
sys.stdout.write(msg)
sys.stdout.flush()
if res != 1:
failed_images.append(basename)
continue
new_filename = "{0}.jpg".format(basename)
# Move the file from the tmp folder to the right grade folder.
rename(join(tmp_path, new_filename),
join(data_dir, str(int(grade)), new_filename))
# Clean tmp folder.
rmtree(tmp_path)
print("Could not preprocess {} images.".format(len(failed_images)))
print(", ".join(failed_images))