-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeepscribe.py
264 lines (223 loc) · 11.2 KB
/
deepscribe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import cv2
import numpy as np
from matplotlib import pyplot as plt
from symbol_image import Symbol_Image
import argparse
import json
from glob import glob, iglob
from disp_multiple_images import show_images
import unicodedata
from tqdm import tqdm
excluded_readings = ['b', 'Epigraphic unit', 'g', 'l', 'n', 'p', 'r', 's', 't', 'x', 'y', 'x x', '[x](+)⸢x⸣', '¦', 'š', 'ʾ', 'ḥ']
for i in range(len(excluded_readings)):
excluded_readings[i] = excluded_readings[i].upper()
class DeepScribe:
"""
Loads, transforms, and displays OCHRE dataset images from an unzipped archive.
"""
@staticmethod
def get_command_line_args():
"""
Receives command line arguments specifying what images to use.
Returns:
args (argparse object): Object storing each argument with its corresponding command line input.
"""
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--symbol', help='symbol query')
# no limit should be input as 'max'
parser.add_argument('-l',
'--limit',
help='limit on number of image results',
default='max')
parser.add_argument('-d', '--directory', help='image directory')
args = parser.parse_args()
return args
# Maybe change symbol_dict to return value instead of modifying parameter
@staticmethod
def load_images(symbol_dict):
"""
Loads images in BGR based on command line arguments and stores them as Symbol_Image objects in a dictionary.
Parameters:
symbol_dict (dict): Dictionary to store loaded image data as symbol name : Symbol_Image object pairs.
"""
args = DeepScribe.get_command_line_args()
if args.symbol is None:
symb_query = "*"
else:
symb_query = unicodedata.normalize('NFC', args.symbol)
query = args.directory + "/" + symb_query + "_*.jpg"
count = 0
for fn in tqdm(iglob(query), desc='filenames'):
# find first occurence of "_" after directory name, which marks the start of the uuid
fn = unicodedata.normalize('NFC', fn)
separator_idx = fn.find("_", len(args.directory)+1)
extension_idx = fn.rfind(".jpg")
name = fn[len(args.directory)+1 : separator_idx]
name = name.upper().strip(' »«')
uuid = fn[separator_idx+1 : extension_idx]
# not using cv2.imread() in order to read unicode filenames
img = cv2.imdecode(np.fromfile(fn, dtype=np.uint8),
cv2.IMREAD_UNCHANGED)
symb_img = Symbol_Image(name, uuid, img)
if name not in excluded_readings:
if name in symbol_dict:
symbol_dict[name].append(symb_img)
else:
symbol_dict[name] = [symb_img]
count += 1
if args.limit != 'max':
if count >= args.limit:
break
@staticmethod
def display_images(symbol_dict, color=False):
"""
Takes dictionary of loaded image data and displays all images in one matplotlib plot.
Paramters:
symbol_dict (dict): Dictionary of loaded image data.
color (boolean): Displays images in color if True, otherwise displays in grayscale.
"""
images = []
for s in symbol_dict.values():
for symb_img in s:
images.append(symb_img.img)
show_images(images, color=color)
# global_thresh can be a threshold value or -1 for none
@staticmethod
def transform_images(symbol_dict,
gray=True,
gauss_filter=-1,
bilat_filter=-1,
global_thresh=-1,
adapt_thresh_mean=-1,
adapt_thresh_gauss=-1,
otsus=-1,
laplacian=False,
canny=-1,
rescale_global_mean=False,
resize=-1):
"""
Takes dictionary of loaded image data, transforms all images according to the parameters, and saves them back in the dictionary.
Paramters:
symbol_dict (dict): Dictionary of loaded image data.
gray (boolean): Converts images to grayscale if True, otherwise leaves them in color.
gauss_filter (int): Kernel size of gaussian filter. If -1, do not apply filter.
bilat_filter (list): List of length 3 of cv2.bilateralFilter parameters. If -1, do not apply filter.
global_thresh (int): Global threshold value. If -1, do not apply threshold.
adapt_thresh_mean (int): Block size of adaptive mean threshold. If -1, do not apply threshold.
adapt_thresh_gauss (int): Block size of adaptive gaussian threshold. If -1, do not apply threshold.
otsus (int): Otsu's binarization threshold value. If -1, do not apply threshold.
laplacian (boolean): Applies Laplacian operator if True, otherwise does not apply operator.
canny (list): List of length 2 of cv2.Canny parameters. If -1, do not apply canny edge detection.
rescale_global_mean (boolean): Normalizes all image pixels and subtracts out the global mean pixel value if True.
resize (int): Target width and height of image to be resized to. If -1, do not resize image.
"""
for s in symbol_dict.values():
for symb_img in s:
if gray:
gray_img = cv2.cvtColor(symb_img.img, cv2.COLOR_BGR2GRAY)
symb_img.img = gray_img
if gauss_filter != -1:
blur_img = cv2.GaussianBlur(symb_img.img,
(gauss_filter, gauss_filter),
0)
symb_img.img = blur_img
if bilat_filter != -1:
bilat_img = cv2.bilateralFilter(symb_img.img,
bilat_filter[0],
bilat_filter[1],
bilat_filter[2])
symb_img.img - bilat_img
if global_thresh != -1:
ret, thresh_img = cv2.threshold(symb_img.img,
global_thresh, 255,
cv2.THRESH_BINARY)
symb_img.img = thresh_img
if adapt_thresh_mean != -1:
thresh_img = cv2.adaptiveThreshold(symb_img.img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \
cv2.THRESH_BINARY, adapt_thresh_mean, 2)
symb_img.img = thresh_img
if adapt_thresh_gauss != -1:
thresh_img = cv2.adaptiveThreshold(symb_img.img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
cv2.THRESH_BINARY, adapt_thresh_gauss, 2)
symb_img.img = thresh_img
if otsus != -1:
ret, thresh_img = cv2.threshold(
symb_img.img, otsus, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)
symb_img.img = thresh_img
if laplacian:
lap_img = cv2.Laplacian(symb_img.img, cv2.CV_64F)
symb_img.img = lap_img
if canny != -1:
canny_img = cv2.Canny(symb_img.img, canny[0], canny[1])
symb_img.img = canny_img
# TODO: is normalizing before resizing correct?
if rescale_global_mean:
scaled_img = symb_img.img / 255.0
symb_img.img = scaled_img - np.mean(scaled_img)
if resize != -1:
old_size = symb_img.img.shape[:2]
delta_w = max(old_size) - old_size[1]
delta_h = max(old_size) - old_size[0]
top, bottom = delta_h // 2, delta_h - (delta_h // 2)
left, right = delta_w // 2, delta_w - (delta_w // 2)
color = [0, 0, 0]
symb_img.img = cv2.copyMakeBorder(symb_img.img,
top,
bottom,
left,
right,
cv2.BORDER_CONSTANT,
value=color)
symb_img.img = cv2.resize(symb_img.img, (resize, resize))
@staticmethod
def count_symbols(sort_by_alpha=False, sort_by_freq=False):
"""
Count the number of different symbols represented by the images loaded by the command line arguments.
Parameters:
sort_by_alpha (boolean): Print out the symbol dictionary of symbol name : frequency pairs for the loaded images in alphabetical order.
sort_by_freq (boolean): Print out the symbol dictionary in ascending order of frequency.
Returns:
A list of different symbol names that were represented by the images loaded by the command line arguments.
"""
symbol_dict = {}
args = DeepScribe.get_command_line_args()
if args.symbol is None:
symb_query = "*"
else:
symb_query = unicodedata.normalize('NFC', args.symbol)
query = args.directory + "/" + symb_query + "_*.jpg"
count = 0
for fn in iglob(query):
# find first occurence of "_" after directory name, which marks the start of the uuid
fn = unicodedata.normalize('NFC', fn)
separator_idx = fn.find("_", len(args.directory)+1)
name = fn[len(args.directory)+1 : separator_idx]
name = name.strip(' »«').upper()
if name not in excluded_readings:
if name in symbol_dict:
symbol_dict[name] += 1
else:
symbol_dict[name] = 1
count += 1
if args.limit != 'max':
if count >= args.limit:
break
if sort_by_alpha:
for s in symbol_dict:
print(s, ":", symbol_dict[s])
total = 0
for s in symbol_dict.values():
total += s
print("Number of unique signs:", len(symbol_dict))
print("Number of images:", total)
if sort_by_freq:
sorted_dict = {k: v for k, v in sorted(symbol_dict.items(), key=lambda item: item[1])}
for s in sorted_dict:
print(s, ":", sorted_dict[s])
total = 0
for s in symbol_dict.values():
total += s
print("Number of unique signs:", len(symbol_dict))
print("Number of images:", total)
return list(symbol_dict.keys())