-
Notifications
You must be signed in to change notification settings - Fork 13
/
mastif_parser.py
160 lines (124 loc) · 7.49 KB
/
mastif_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Program to extract img and labels from MASTIF and converting them to darknet format.
import csv
import re
import os.path
from common_config import *
# TO CHANGE
MASTIF_ROOT_PATH = "/media/angeliton/Backup1/DBs/Road Signs/MASTIF/"
RESIZE_PERCENTAGE = 0.9
DB_PREFIX = 'mastif-'
ANNOTATIONS_FOLDERS = ["TS2009", "TS2010", "TS2011"]
ANNOTATIONS_FILENAME = "index.seq"
INPUT_PATH = MASTIF_ROOT_PATH + "input-img/"
def initialize_traffic_sign_classes():
traffic_sign_classes.clear()
traffic_sign_classes["0-prohibitory"] = ["B03", "B05", "B06", "B07", "B08", "B09", "B10", "B11", "B12", "B13", "B14", "B15", "B16", "B17", "B18", "B19", "B20", "B21", "B22", "B23", "B24", "B25", "B26", "B27", "B28", "B29", "B30", "B31", "B32", "B33", "B34", "B35", "B36", "B37", "B38"]
traffic_sign_classes["1-danger"] = ["A01", "A02", "A03", "A04", "A05", "A06", "A07", "A08", "A09", "A10", "A11", "A12", "A13", "A14", "A15", "A16", "A17", "A18", "A19", "A20", "A21", "A22", "A23", "A24", "A25", "A26", "A27", "A28", "A29", "A30", "A31", "A32", "A33", "A34", "A35", "A36", "A37", "A38", "A39", "A40", "A41", "A42", "A43", "A44", "A45", "A46"]
traffic_sign_classes["2-mandatory"] = ["B44", "B45", "B46", "B47", "B48", "B49", "B50", "B51", "B52", "B53", "B54", "B55", "B56", "B57", "B58", "B59", "B60", "B61", "B62"]
traffic_sign_classes["3-information"] = ["C01", "C02", "C03", "C05", "C06", "C10", "C29", "C30", "C31", "C32", "C33", "C34", "C35", "C36", "C37", "C38", "C39", "C40", "C41", "C42", "C43", "C44", "C45", "C46", "C47", "C48", "C49", "C50", "C51", "C52", "C53", "C54", "C55", "C56", "C57", "C58", "C59", "C60", "C61", "C62", "C63", "C64", "C65", "C68", "C69", "C70", "C71", "C72", "C73", "C75", "C77", "C86", "C88", "C89", "C90", "C91", "C92", "C93", "C96"]
traffic_sign_classes["4-stop"] = ["B02"]
traffic_sign_classes["5-yield"] = ["B01"]
traffic_sign_classes["6-noentry"] = ["B04"]
traffic_sign_classes[str(OTHER_CLASS) + "-" + OTHER_CLASS_NAME] = [] # undefined, other, redbluecircles, diamonds
# It depends on the row format
def calculate_darknet_format(input_img, row):
real_img_width, real_img_height = get_img_dim_plt(input_img)
image_width = int(real_img_width * RESIZE_PERCENTAGE)
image_height = int(real_img_height * RESIZE_PERCENTAGE)
width_proportion = (real_img_width / image_width)
height_proportion = (real_img_height / image_height)
x = float(row[2]) / width_proportion
y = float(row[3]) / height_proportion
w = float(row[4]) / width_proportion
h = float(row[5]) / height_proportion
object_class = row[1]
object_class_adjusted = adjust_object_class(object_class) # Adjust class category
if SHOW_IMG:
show_img(resize_img_plt(input_img, image_width, image_height), x, y, w, h)
return parse_darknet_format(object_class_adjusted, image_width, image_height, x, y, x + w, y + h)
def add_file_to_dir(row, subfolder_name, img_labels):
filename = row[0]
file_path = INPUT_PATH + subfolder_name + "/" + filename
if os.path.isfile(file_path):
# If it is the first label for that img
if filename not in img_labels.keys():
img_labels[subfolder_name + "-" + filename] = [file_path]
# Loop for all the labels in the row and calculate darknet format every 5.
while (len(row) > 1):
input_img = read_img_plt(file_path)
darknet_label = calculate_darknet_format(input_img, row)
object_class_adjusted = int(darknet_label.split()[0])
if object_class_adjusted != OTHER_CLASS: # Add only useful labels (not false negatives)
img_labels[subfolder_name + "-" + filename].append(darknet_label)
# print("\t" + darknet_label)
del row[1:6] # Remove 5 values from row (already seen)
def update_global_variables(train_pct, test_pct, color_mode, verbose, false_data, output_img_ext):
global TRAIN_PROB, TEST_PROB, COLOR_MODE, SHOW_IMG, ADD_FALSE_DATA, OUTPUT_IMG_EXTENSION
TRAIN_PROB = train_pct
TEST_PROB = test_pct
COLOR_MODE = color_mode
SHOW_IMG = verbose
ADD_FALSE_DATA = false_data
OUTPUT_IMG_EXTENSION = output_img_ext
def read_dataset(output_train_text_path, output_test_text_path, output_train_dir_path, output_test_dir_path):
img_labels = {} # Set of images and its labels [filename]: [()]
update_db_prefix(DB_PREFIX)
initialize_traffic_sign_classes()
initialize_classes_counter()
train_text_file = open(output_train_text_path, "a+")
test_text_file = open(output_test_text_path, "a+")
# Loop between datasets subfolders
for subfolder_name in ANNOTATIONS_FOLDERS:
subfolder = INPUT_PATH + subfolder_name
subfolder_annotation_filename = subfolder + "/" + ANNOTATIONS_FILENAME
if (os.path.exists(subfolder_annotation_filename)):
subfolder_annotation_file = open(subfolder_annotation_filename, "r")
# Format each line from mastif to csv format
for line in subfolder_annotation_file.readlines():
line = re.sub("[\[\]\(\)]", "", line)
line = re.sub("[xywh]=", "", line)
line = re.sub("[:&@,]", " ", line)
# print("\t" + line)
row = line.split(" ")
add_file_to_dir(row, subfolder_name, img_labels)
subfolder_annotation_file.close()
else:
print("Subfolder " + subfolder + " not found")
# COUNT FALSE NEGATIVES (IMG WITHOUT LABELS)
total_false_negatives_dir = {}
total_annotated_images_dir = {}
for filename in img_labels.keys():
img_label_subset = img_labels[filename]
if len(img_label_subset) == 1:
total_false_negatives_dir[filename] = img_label_subset
else:
total_annotated_images_dir[filename] = img_label_subset
# CALCULATE MAXIMUM FALSE NEGATIVES TO ADD
total_annotated_images = len(img_labels.keys()) - len(total_false_negatives_dir.keys())
total_false_negatives = len(total_false_negatives_dir.keys())
max_false_data = round(total_annotated_images * TRAIN_PROB) # False data: False negative + background
print("TOTAL ANNOTATED IMAGES: " + str(total_annotated_images))
print("TOTAL FALSE NEGATIVES: " + str(total_false_negatives))
print("MAX FALSE DATA: " + str(max_false_data))
# ADD FALSE IMAGES TO TRAIN
if total_false_negatives > max_false_data:
total_false_negatives = max_false_data
if ADD_FALSE_DATA:
add_false_negatives(total_false_negatives, total_false_negatives_dir, output_train_dir_path, train_text_file)
# SET ANNOTATED IMAGES IN TRAIN OR TEST DIRECTORIES
for filename in total_annotated_images_dir.keys():
input_img_file_path = img_labels[filename][0]
input_img = read_img(input_img_file_path) # Read image from image_file_path
input_img = resize_img_percentage(input_img, RESIZE_PERCENTAGE) # Resize img
input_img_labels = img_labels[filename][1:]
# Get percentage for train and another for testing
train_file = rand.choices([True, False], [TRAIN_PROB, TEST_PROB])[0]
output_filename = DB_PREFIX + filename
if train_file:
write_data(output_filename, input_img, input_img_labels, train_text_file, output_train_dir_path, train_file)
else:
write_data(output_filename, input_img, input_img_labels, test_text_file, output_test_dir_path, train_file)
train_text_file.close()
test_text_file.close()
return classes_counter_train, classes_counter_test
# read_dataset()