-
Notifications
You must be signed in to change notification settings - Fork 5
/
FashionDataset.py
213 lines (166 loc) · 7.34 KB
/
FashionDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
from mrcnn import utils
import pandas as pd
import pickle
class FashionDataset(utils.Dataset):
"""
Implements mrcnn.utils.Dataset.
FashionDataset holds data relevant to the imaterialist challenge data.
"""
def __init__(self):
super(FashionDataset, self).__init__()
self.class_names = []
@staticmethod
def load(filepath: str):
with open(filepath, 'rb') as f:
data = pickle.load(f)
return data
def save(self, filepath: str):
with open(save_file, 'wb') as f:
pickle.dump(self, filepath)
def create_classes(self, cat_file: str) -> [dict]:
"""
Added to FashionDataset.
Initialize the classes.
param:cat_file - filepath to fashion dataset's label_descriptions.json file
"""
# read labels file
with open(cat_file, 'r') as data_file:
data = data_file.read()
# parse file
labels = json.loads(data)
categories = labels.get('categories')
df_categories = pd.DataFrame(categories)
df_categories['source'] = "imaterialist"
dict_categories = [dict(x[1]) for x in df_categories.iterrows()]
for c in dict_categories:
self.add_class(c['source'], c['id'] + 1, c['name']) # add 1 to make room for background
print("{} classes added.".format(len(dict_categories)))
return dict_categories
def create_anns(self, sub_df_images: pd.DataFrame) -> dict:
"""
Creates an 'annotations' entry in an image's image_info entry.
dict_keys(['id', 'image_id', 'segmentation', 'category_id', 'area', 'iscrowd', 'bbox']
"""
annotations = []
for mask in sub_df_images.iterrows():
h = int(mask[1].get('height'))
w = int(mask[1].get('width'))
counts = np.fromstring(mask[1]['EncodedPixels'], dtype=int, sep=" ")
ann_dict = {'id': mask[1]['id'],
'image_id': mask[1]['file_name'],
'segmentation': {'counts': counts, 'size': [h, w]},
'category_id': int(mask[1]['ClassId'].split('_')[0]) + 1, # add 1 to make room for background
'iscrowd': True, # True indicates the use of uncompressed RLE
'bbox': []}
annotations.append(ann_dict)
return annotations
def create_images(self, images_file: str, train_dir: str, imgids: list = None, limit: int = None) -> (
dict, pd.DataFrame):
"""
Build the image_info['images'] dictionary element with all images.
If imgids list is None, all images in the images_file will be included, otherwise,
only the imgids in the list will be included.
"""
df_images = pd.read_csv(images_file, nrows=limit)
# restrict the dataframe to items in imgids list, if list is provided
if imgids is not None:
df_images = df_images[df_images.ImageId.isin(imgids)]
df_images.rename(columns={"ImageId": 'file_name', "Height": 'height', "Width": 'width'}, inplace=True)
df_images['id'] = [x for x in range(len(df_images))]
df_images['source'] = 'imaterialist'
dict_images = [dict(x[1]) for x in tqdm(df_images.iterrows(), desc="Create images dict", total=len(df_images))]
for image in tqdm(dict_images, desc="Add images to object"):
file_path = os.path.join(train_dir, image['file_name'])
self.add_image(source=image['source'],
image_id=image['id'],
path=file_path,
height=image['height'],
width=image['width'],
file_name=image['file_name'],
annotations=self.create_anns(df_images[df_images.file_name == image['file_name']]))
print("Added {} images.".format(len(df_images)))
return self.image_info
def load_image(self, image_id):
"""
Load the specified image and return a [H,W,3] Numpy array.
"""
# Load image
image = skimage.io.imread(self.image_info[image_id]['path'])
# If grayscale. Convert to RGB for consistency.
if image.ndim != 3:
image = skimage.color.gray2rgb(image)
# If has an alpha channel, remove it for consistency
if image.shape[-1] == 4:
image = image[..., :3]
return image
def load_mask(self, image_id):
"""Load instance masks for the given image.
Different datasets use different ways to store masks. This
function converts the different mask format to one format
in the form of a bitmap [height, width, instances].
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = self.image_info[image_id]
instance_masks = []
class_ids = []
# returns list of masks/annotations for the image
annotations = self.image_info[image_id]["annotations"]
# Build mask of shape [height, width, instance_count] and list
# of class IDs that correspond to each channel of the mask.
for annotation in annotations:
class_id = annotation['category_id'] # one of 46 categories
if class_id:
# updated to reflect problems with original maskutils implementtaion of decode
m = self.kaggle_rle_decode(annotation, image_info["height"], image_info["width"])
# Some objects are so small that they're less than 1 pixel area
# and end up rounded out. Skip those objects.
if m.max() < 1:
continue
# Is it a crowd? If so, use a negative class ID.
if annotation['iscrowd']:
# Use negative class ID for crowds
# For crowd masks, annToMask() sometimes returns a mask
# smaller than the given dimensions. If so, resize it.
if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
instance_masks.append(m)
class_ids.append(class_id)
# Pack instance masks into an array
if class_ids:
mask = np.stack(instance_masks, axis=2).astype(np.bool)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
def image_reference(self, image_id):
"""Return a link to the image in its source Website or details about
the image that help looking it up or debugging it.
Override for your dataset, but pass to this function
if you encounter images not in your dataset.
"""
# assume user provided the integer id of the image
for img in self.image_info:
if img['id'] == image_id:
return img['path']
# check if the user entered the file name
for img in self.image_info:
if img['file_name'] == image_id:
return img['path']
print("Image '{}' not found.".format(image_id))
return None
def kaggle_rle_decode(self, ann, h, w):
"""
https://github.com/amirassov/kaggle-imaterialist/blob/master/src/rle.py
Takes uncompressed RLE for a single mask. Returns binary mask.
param: ann - annotation including uncompressed rle in ['segmentation']['counts']
-- where counts is a list of integers. Also includes 'size' which is a list [int(h), int(w)]
"""
rle = ann['segmentation']['counts']
starts, lengths = map(np.asarray, (rle[::2], rle[1::2]))
starts -= 1
ends = starts + lengths
img = np.zeros(h * w, dtype=np.uint8)
for lo, hi in zip(starts, ends):
img[lo:hi] = 1
return img.reshape((w, h)).T