Fix LetterResize bug with imrescale api (#105)

* [Feature] Make docker image smaller (#67) * [Feature] Make docker image smaller * Improve doc * Use Compose directly in BaseMixImageTransform (#71) * Use 'Compose' directly Use 'Compose' directly * avoids some unnecessary calculations avoids some unnecessary calculations. * remove whitespace remove whitespace * fix resize bug * fix resize bug * fix resize bug * fix * fix resize bug * add test letterresize without batchshape * fix resize bug Co-authored-by: HinGwenWoong <peterhuang0323@qq.com> Co-authored-by: jason_w <wongzheng@126.com>
open-mmlab · Sep 29, 2022 · 0d97ffe · 0d97ffe
1 parent bb3aa48
commit 0d97ffe
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 22 deletions.
diff --git a/mmyolo/datasets/transforms/transforms.py b/mmyolo/datasets/transforms/transforms.py
@@ -167,13 +167,14 @@ def _resize_img(self, results: dict):
 
         # Use batch_shape if a batch_shape policy is configured
         if 'batch_shape' in results:
-            self.scale = tuple(results['batch_shape'])
+            scale = tuple(results['batch_shape'])
+        else:
+            scale = self.scale
 
         image_shape = image.shape[:2]  # height, width
 
         # Scale ratio (new / old)
-        ratio = min(self.scale[0] / image_shape[0],
-                    self.scale[1] / image_shape[1])
+        ratio = min(scale[0] / image_shape[0], scale[1] / image_shape[1])
 
         # only scale down, do not scale up (for better test mAP)
         if not self.allow_scale_up:
@@ -187,7 +188,7 @@ def _resize_img(self, results: dict):
 
         # padding height & width
         padding_h, padding_w = [
-            self.scale[0] - no_pad_shape[0], self.scale[1] - no_pad_shape[1]
+            scale[0] - no_pad_shape[0], scale[1] - no_pad_shape[1]
         ]
         if self.use_mini_pad:
             # minimum rectangle padding
@@ -196,20 +197,14 @@ def _resize_img(self, results: dict):
         elif self.stretch_only:
             # stretch to the specified size directly
             padding_h, padding_w = 0.0, 0.0
-            no_pad_shape = (self.scale[0], self.scale[1])
-            ratio = [
-                self.scale[0] / image_shape[0], self.scale[1] / image_shape[1]
-            ]  # height, width ratios
-
-        # divide padding into 2 sides
-        padding_h /= 2
-        padding_w /= 2
+            no_pad_shape = (scale[0], scale[1])
+            ratio = [scale[0] / image_shape[0],
+                     scale[1] / image_shape[1]]  # height, width ratios
 
-        if image_shape[::-1] != no_pad_shape:
+        if image_shape != no_pad_shape:
             # compare with no resize and padding size
-            image = mmcv.imrescale(
-                image,
-                no_pad_shape,
+            image = mmcv.imresize(
+                image, (no_pad_shape[1], no_pad_shape[0]),
                 interpolation=self.interpolation,
                 backend=self.backend)
 
@@ -221,10 +216,10 @@ def _resize_img(self, results: dict):
             results['scale_factor'] = scale_factor
 
         # padding
-        top_padding, bottom_padding = int(round(padding_h - 0.1)), int(
-            round(padding_h + 0.1))
-        left_padding, right_padding = int(round(padding_w - 0.1)), int(
-            round(padding_w + 0.1))
+        top_padding, left_padding = int(round(padding_h // 2 - 0.1)), int(
+            round(padding_w // 2 - 0.1))
+        bottom_padding = padding_h - top_padding
+        right_padding = padding_w - left_padding
 
         padding_list = [
             top_padding, bottom_padding, left_padding, right_padding
@@ -257,7 +252,8 @@ def _resize_masks(self, results: dict):
             results['scale_factor'][0]
         gt_mask_width = results['gt_masks'].width * \
             results['scale_factor'][1]
-        gt_masks = results['gt_masks'].rescale((gt_mask_height, gt_mask_width))
+        gt_masks = results['gt_masks'].resize(
+            (int(round(gt_mask_height)), int(round(gt_mask_width))))
 
         # padding the gt_masks
         if len(gt_masks) == 0:

diff --git a/tests/test_datasets/test_transforms/test_transforms.py b/tests/test_datasets/test_transforms/test_transforms.py
@@ -75,12 +75,51 @@ def test_letter_resize(self):
         # Test stretch_only
         transform = LetterResize(scale=(640, 640), stretch_only=True)
         results = transform(copy.deepcopy(self.data_info1))
-        self.assertEqual(results['img_shape'], (460, 613, 3))
+        self.assertEqual(results['img_shape'], (460, 672, 3))
         self.assertTrue((results['gt_bboxes'] == np.array(
             [[0., 0., 230., 251.99998474121094]])).all())
         self.assertTrue((results['batch_shape'] == np.array([460, 672])).all())
         self.assertTrue((results['pad_param'] == np.array([0, 0, 0, 0])).all())
 
+        # Test
+        transform = LetterResize(scale=(640, 640), pad_val=dict(img=144))
+        rng = np.random.RandomState(0)
+        for _ in range(20):
+            input_h, input_w = np.random.randint(100, 700), np.random.randint(
+                100, 700)
+            output_h, output_w = np.random.randint(100,
+                                                   700), np.random.randint(
+                                                       100, 700)
+            data_info = dict(
+                img=np.random.random((input_h, input_w, 3)),
+                gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32),
+                batch_shape=np.array([output_h, output_w], dtype=np.int64),
+                gt_masks=BitmapMasks(
+                    rng.rand(1, input_h, input_w),
+                    height=input_h,
+                    width=input_w))
+            results = transform(data_info)
+            self.assertEqual(results['img_shape'], (output_h, output_w, 3))
+            self.assertTrue(
+                (results['batch_shape'] == np.array([output_h,
+                                                     output_w])).all())
+
+        # Test without batchshape
+        transform = LetterResize(scale=(640, 640), pad_val=dict(img=144))
+        rng = np.random.RandomState(0)
+        for _ in range(20):
+            input_h, input_w = np.random.randint(100, 700), np.random.randint(
+                100, 700)
+            data_info = dict(
+                img=np.random.random((input_h, input_w, 3)),
+                gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32),
+                gt_masks=BitmapMasks(
+                    rng.rand(1, input_h, input_w),
+                    height=input_h,
+                    width=input_w))
+            results = transform(data_info)
+            self.assertEqual(results['img_shape'], (640, 640, 3))
+
 
 class TestYOLOv5KeepRatioResize(unittest.TestCase):