-
Notifications
You must be signed in to change notification settings - Fork 80
/
Copy pathinspectHITs.py
177 lines (156 loc) · 6.77 KB
/
inspectHITs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# -------------------- inspectHITs.py -------------------------
# python inspectHITs.py --csvFileIn Batch_3211133_batch_results.csv \
# --imagePathRoot SJ7STAR_images --csvFileOut batch_results_checked.csv
# Use this application to review a downloaded MTurk csv results file
# and generate a a modified csv results file with the accept and reject entries
# filled in. An 'x' in the accept column accepts the HIT, or any text in the reject
# column rejects the HIT
# Loops over all the HITs, diplaying the images, bounding boxes and box labels
# Displays a text box where the response can be entered. If the entry is 'x'
# this is placed in the accept column, any other text is placed in the reject column
# Once complete, upload the new csv file to MTurk
# ---- Format of downloaded csv file ---
# The csv file that is retrieved from the first submission has "AssignmentStatus"
# assigned "Submitted"
# Every entry will have to be checked and "Approve" and "Reject" fields assgned appropriately
# When the csv file is returned from the second submission, it gets a more complicated
# The "Reject" assignment is moved to "RequesterFeedback"
# There will be two entries for each "Input.image_url". One with "AssignmentStatus" equal
# to "Rejected" or "Accepted", and one with "AssignmentStatus" equal to "Submitted"
from imutils import paths
from PIL import Image, ImageDraw, ImageFont
import tkinter
from PIL import ImageTk
import argparse
import numpy as np
import os
import sys
import csv
import re
# construct the argument parser and parse command line arguments
ap = argparse.ArgumentParser()
ap.add_argument("-l", "--csvFileIn", required=True,
help="path to labels input file")
ap.add_argument("-o", "--csvFileOut", required=True,
help="path to labels output file")
ap.add_argument("-i", "--imagePathRoot", required=True,
help="path to image files")
args = vars(ap.parse_args())
# check arguements
if os.path.exists(args["csvFileIn"]) == False:
print("[ERROR]: --csvFileIn \"{}\" does not exist".format(args["csvFileIn"]))
sys.exit()
if os.path.exists(args["imagePathRoot"]) == False:
print("[ERROR]: --imagePathRoot \"{}\" does not exist".format(args["imagePathRoot"]))
sys.exit()
# read the csv file and copy to dictionary
csvFileIn = open(args["csvFileIn"], "r")
csvReader = csv.DictReader(csvFileIn)
#print("[INFO] Number of HITs in the file: {}".format(len(csvSplit) - 1))
# return key event handler
def return_key_exit_mainloop (event):
event.widget.quit() # this will cause mainloop to unblock.
# configure the main window, and bind the return key
root = tkinter.Tk()
root.geometry('+%d+%d' % (100,100))
root.bind('<Return>', return_key_exit_mainloop)
# Process the csv output file
csvImagesChecked = dict()
# If the csv output file already exists, then open the file and read all the image file names,
# add them to a set, and then close the csv file
if os.path.exists(args["csvFileOut"]) == True:
print("[INFO] csv output file already exists. Appending data")
csvFileOutExists = True
csvFileOut = open(args["csvFileOut"], "r")
csvOutReader = csv.DictReader(csvFileOut)
# Loop over all entries in the csv output, and for each "Input.image_url"
# add the "AssignmentStatus"
for hitDict in csvOutReader:
if hitDict["Input.image_url"] in csvImagesChecked:
csvImagesChecked[hitDict["Input.image_url"]].append(hitDict["AssignmentStatus"])
else:
csvImagesChecked[hitDict["Input.image_url"]] = [hitDict["AssignmentStatus"]]
csvFileOut.close()
# else the csv output file does not exist. Set a flag 'csvFileOutExists'
else:
print("[INFO] csv output file not found. Creating new file")
csvFileOutExists = False
# open the csv output file in append mode. If it does not exist, create it, and add header
csvWriteFile = open(args["csvFileOut"], 'a+', newline='')
csvWriter = csv.DictWriter(csvWriteFile, fieldnames=csvReader.fieldnames)
if csvFileOutExists == False:
csvWriter.writeheader()
# Loop over the lines in the csv file. Each line read as a dictionary
hitProcessedAlready = False
for hitDict in csvReader:
# if HIT has already been copied to csv output file, then break from the loop
localImagePath = hitDict["Input.image_url"]
hitAssignmentStatus = csvImagesChecked.get(localImagePath)
for status in hitAssignmentStatus:
if status == hitDict["AssignmentStatus"]:
hitProcessedAlready = True
continue
if hitProcessedAlready == True:
continue
# if HIT "AssignmentStatus" is not "Submitted", then break from loop
if hitDict["AssignmentStatus"] != "Submitted" or hitProcessedAlready == True:
csvWriter.writerow(hitDict)
continue
# Read the image
imagePath = os.path.join(args["imagePathRoot"], hitDict["Input.image_url"])
image = Image.open(imagePath)
draw = ImageDraw.Draw(image)
#find the boxes, and draw them on the image
annotations = hitDict["Answer.annotation_data"]
matches = re.finditer(r"\{(.*?)\}", annotations)
font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf", 30)
# loop over all the boxes
for matchNum, match in enumerate(matches):
locDict = dict()
match = match.group()
annSplit = match.split(",")
# loop over the box location data
for loc in annSplit:
locSplit = loc.split(":")
locDict[locSplit[0].strip("\"{}")] = locSplit[1].strip("\"{}")
top = int(locDict["top"])
left = int(locDict["left"])
bottom = top + int(locDict["height"])
right = left + int(locDict["width"])
#draw.rectangle((top,right,bottom,left), outline=128, fill=None)
draw.rectangle((left, top, right, bottom), outline=128, fill=None)
draw.text((left, max(0,top-30)), locDict["label"].upper(), font=font, fill="red")
del draw
# resize image
basewidth = 1000
wpercent = (basewidth / float(image.size[0]))
hsize = int((float(image.size[1]) * float(wpercent)))
image = image.resize((basewidth, hsize), Image.ANTIALIAS)
# Add image, prompt text, and text Entry box to window, default text is from labels file
root.geometry('%dx%d' % (image.size[0],image.size[1]))
tkpi = ImageTk.PhotoImage(image)
label_image = tkinter.Label(root, image=tkpi)
label_image.place(x=0,y=0,width=image.size[0],height=image.size[1])
label = tkinter.Label(root, text="Enter x to accept, or a reason to reject")
label.pack()
e = tkinter.Entry(root)
fileName = imagePath.split('/')[-1]
e.insert(0, "x")
e.pack()
e.focus_set()
root.title(imagePath)
root.mainloop() # wait until user presses 'return'
# get the accept/reject response, and write to the output csv file
acceptReject = e.get()
if acceptReject == 'x' or acceptReject == 'X':
hitDict["Approve"] = "x"
else:
hitDict["Reject"] = acceptReject
csvWriter.writerow(hitDict)
csvWriteFile.flush()
# Finished with the window, destroy
label_image.destroy()
e.destroy()
label.destroy()
print(acceptReject)
csvWriteFile.close()