50
50
[suggestion 0]
51
51
The image is very large and the items you need to detect are small.
52
52
53
- Step 1: You should start by splitting the image into sections and runing the detection algorithm on each section:
53
+ Step 1: You should start by splitting the image into overlapping sections and runing the detection algorithm on each section:
54
54
55
55
def subdivide_image(image):
56
56
height, width, _ = image.shape
@@ -66,41 +66,96 @@ def subdivide_image(image):
66
66
67
67
get_tool_for_task('<your prompt here>', subdivide_image(image))
68
68
69
- Step 2: Once you have the detections from each subdivided image, you will need to merge them back together to remove overlapping predictions:
70
-
71
- def translate_ofset(bbox, offset_x, offset_y):
72
- return (bbox[0] + offset_x, bbox[1] + offset_y, bbox[2] + offset_x, bbox[3] + offset_y)
73
-
74
- def bounding_boxes_overlap(bbox1, bbox2):
75
- if bbox1[2] < bbox2[0] or bbox2[0] > bbox1[2]:
76
- return False
77
- if bbox1[3] < bbox2[1] or bbox2[3] > bbox1[3]:
78
- return False
79
- return True
80
-
81
- def merge_bounding_boxes(bbox1, bbox2):
82
- x_min = min(bbox1[0], bbox2[0])
83
- y_min = min(bbox1[1], bbox2[1])
84
- x_max = max(bbox1[2], bbox2[2])
85
- y_max = max(bbox1[3], bbox2[3])
86
- return (x_min, y_min, x_max, y_max)
87
-
88
- def merge_bounding_box_list(bboxes):
89
- merged_bboxes = []
90
- while bboxes:
91
- bbox = bboxes.pop()
92
- overlap_found = False
93
- for i, other_bbox in enumerate(merged_bboxes):
94
- if bounding_boxes_overlap(bbox, other_bbox):
95
- merged_bboxes[i] = merge_bounding_boxes(bbox, other_bbox)
96
- overlap_found = True
69
+ Step 2: Once you have the detections from each subdivided image, you will need to merge them back together to remove overlapping predictions, be sure to tranlate the offset back to the original image:
70
+
71
+ def bounding_box_match(b1: List[float], b2: List[float], iou_threshold: float = 0.1) -> bool:
72
+ # Calculate intersection coordinates
73
+ x1 = max(b1[0], b2[0])
74
+ y1 = max(b1[1], b2[1])
75
+ x2 = min(b1[2], b2[2])
76
+ y2 = min(b1[3], b2[3])
77
+
78
+ # Calculate intersection area
79
+ if x2 < x1 or y2 < y1:
80
+ return False # No overlap
81
+
82
+ intersection = (x2 - x1) * (y2 - y1)
83
+
84
+ # Calculate union area
85
+ area1 = (b1[2] - b1[0]) * (b1[3] - b1[1])
86
+ area2 = (b2[2] - b2[0]) * (b2[3] - b2[1])
87
+ union = area1 + area2 - intersection
88
+
89
+ # Calculate IoU
90
+ iou = intersection / union if union > 0 else 0
91
+
92
+ return iou >= iou_threshold
93
+
94
+ def merge_bounding_box_list(detections):
95
+ merged_detections = []
96
+ for detection in detections:
97
+ matching_box = None
98
+ for i, other in enumerate(merged_detections):
99
+ if bounding_box_match(detection["bbox"], other["bbox"]):
100
+ matching_box = i
97
101
break
98
- if not overlap_found:
99
- p
100
- merged_bboxes.append(bbox)
101
- return merged_bboxes
102
102
103
- detection = merge_bounding_box_list(detection_from_subdivided_images)
103
+ if matching_box is not None:
104
+ # Keep the box with higher confidence score
105
+ if detection["score"] > merged_detections[matching_box]["score"]:
106
+ merged_detections[matching_box] = detection
107
+ else:
108
+ merged_detections.append(detection)
109
+
110
+ def sub_image_to_original(elt, sub_image_position, original_size):
111
+ offset_x, offset_y = sub_image_position
112
+ return {
113
+ "label": elt["label"],
114
+ "score": elt["score"],
115
+ "bbox": [
116
+ (elt["bbox"][0] + offset_x) / original_size[1],
117
+ (elt["bbox"][1] + offset_y) / original_size[0],
118
+ (elt["bbox"][2] + offset_x) / original_size[1],
119
+ (elt["bbox"][3] + offset_y) / original_size[0],
120
+ ],
121
+ }
122
+
123
+ def normalized_to_unnormalized(elt, image_size):
124
+ return {
125
+ "label": elt["label"],
126
+ "score": elt["score"],
127
+ "bbox": [
128
+ elt["bbox"][0] * image_size[1],
129
+ elt["bbox"][1] * image_size[0],
130
+ elt["bbox"][2] * image_size[1],
131
+ elt["bbox"][3] * image_size[0],
132
+ ],
133
+ }
134
+
135
+ height, width, _ = image.shape
136
+ mid_width = width // 2
137
+ mid_height = height // 2
138
+
139
+ detection_from_subdivided_images = []
140
+ for i, sub_image in enumerate(subdivided_images):
141
+ detections = <your detection function here>("pedestrian", sub_image)
142
+ unnorm_detections = [
143
+ normalized_to_unnormalized(
144
+ detection, (sub_image.shape[0], sub_image.shape[1])
145
+ )
146
+ for detection in detections
147
+ ]
148
+ offset_x = i % 2 * (mid_width - int(mid_width * 0.1))
149
+ offset_y = i // 2 * (mid_height - int(mid_height * 0.1))
150
+ offset_detections = [
151
+ sub_image_to_original(
152
+ unnorm_detection, (offset_x, offset_y), (height, width)
153
+ )
154
+ for unnorm_detection in unnorm_detections
155
+ ]
156
+ detection_from_subdivided_images.extend(offset_detections)
157
+
158
+ detections = merge_bounding_box_list(detection_from_subdivided_images)
104
159
[end of suggestion 0]
105
160
[end of suggestion]
106
161
<count>9</count>
@@ -164,36 +219,44 @@ def subdivide_image(image):
164
219
165
220
AGENT: <thinking>I need to now merge the boxes from all region and use the countgd_object_detection tool with the prompt 'pedestrian' as suggested by get_tool_for_task.</thinking>
166
221
<execute_python>
167
- def translate_ofset(bbox, offset_x, offset_y):
168
- return (bbox[0] + offset_x, bbox[1] + offset_y, bbox[2] + offset_x, bbox[3] + offset_y)
169
-
170
- def bounding_boxes_overlap(bbox1, bbox2):
171
- if bbox1[2] < bbox2[0] or bbox2[0] > bbox1[2]:
172
- return False
173
- if bbox1[3] < bbox2[1] or bbox2[3] > bbox1[3]:
174
- return False
175
- return True
176
-
177
- def merge_bounding_boxes(bbox1, bbox2):
178
- x_min = min(bbox1[0], bbox2[0])
179
- y_min = min(bbox1[1], bbox2[1])
180
- x_max = max(bbox1[2], bbox2[2])
181
- y_max = max(bbox1[3], bbox2[3])
182
- return (x_min, y_min, x_max, y_max)
183
-
184
- def merge_bounding_box_list(bboxes):
185
- merged_bboxes = []
186
- while bboxes:
187
- bbox = bboxes.pop()
188
- overlap_found = False
189
- for i, other_bbox in enumerate(merged_bboxes):
190
- if bounding_boxes_overlap(bbox, other_bbox):
191
- merged_bboxes[i] = merge_bounding_boxes(bbox, other_bbox)
192
- overlap_found = True
222
+ def bounding_box_match(b1: List[float], b2: List[float], iou_threshold: float = 0.1) -> bool:
223
+ # Calculate intersection coordinates
224
+ x1 = max(b1[0], b2[0])
225
+ y1 = max(b1[1], b2[1])
226
+ x2 = min(b1[2], b2[2])
227
+ y2 = min(b1[3], b2[3])
228
+
229
+ # Calculate intersection area
230
+ if x2 < x1 or y2 < y1:
231
+ return False # No overlap
232
+
233
+ intersection = (x2 - x1) * (y2 - y1)
234
+
235
+ # Calculate union area
236
+ area1 = (b1[2] - b1[0]) * (b1[3] - b1[1])
237
+ area2 = (b2[2] - b2[0]) * (b2[3] - b2[1])
238
+ union = area1 + area2 - intersection
239
+
240
+ # Calculate IoU
241
+ iou = intersection / union if union > 0 else 0
242
+
243
+ return iou >= iou_threshold
244
+
245
+ def merge_bounding_box_list(detections):
246
+ merged_detections = []
247
+ for detection in detections:
248
+ matching_box = None
249
+ for i, other in enumerate(merged_detections):
250
+ if bounding_box_match(detection["bbox"], other["bbox"]):
251
+ matching_box = i
193
252
break
194
- if not overlap_found:
195
- merged_bboxes.append(bbox)
196
- return merged_bboxes
253
+
254
+ if matching_box is not None:
255
+ # Keep the box with higher confidence score
256
+ if detection["score"] > merged_detections[matching_box]["score"]:
257
+ merged_detections[matching_box] = detection
258
+ else:
259
+ merged_detections.append(detection)
197
260
198
261
detections = []
199
262
for region in subdivide_image(image):
0 commit comments