From bda105011895b654e28548c42bd60a0e3401d810 Mon Sep 17 00:00:00 2001 From: Nioolek <40284075+Nioolek@users.noreply.github.com> Date: Mon, 6 Mar 2023 17:39:32 +0800 Subject: [PATCH] [Feature] YOLOv5 supports using mask annotation to optimize bbox (#565) * add v5 config and readme * fix config * update config * add remove mask * update * update * fix * update --------- Co-authored-by: huanghaian --- configs/yolov5/README.md | 28 +++--- ...-refine-v61_syncbn_fast_8xb16-300e_coco.py | 77 +++++++++++++++++ ...-refine-v61_syncbn_fast_8xb16-300e_coco.py | 86 +++++++++++++++++++ ...-refine-v61_syncbn_fast_8xb16-300e_coco.py | 20 +++++ ...-refine-v61_syncbn_fast_8xb16-300e_coco.py | 62 +++++++++++++ ...-refine-v61_syncbn_fast_8xb16-300e_coco.py | 21 +++++ configs/yolov5/metafile.yml | 72 ++++++++++++++++ 7 files changed, 355 insertions(+), 11 deletions(-) create mode 100644 configs/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py create mode 100644 configs/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py create mode 100644 configs/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py create mode 100644 configs/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py create mode 100644 configs/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py diff --git a/configs/yolov5/README.md b/configs/yolov5/README.md index b22d880fc..4f7d236a5 100644 --- a/configs/yolov5/README.md +++ b/configs/yolov5/README.md @@ -20,19 +20,24 @@ YOLOv5-l-P6 model structure ### COCO -| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download | -| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| YOLOv5-n | P5 | 640 | Yes | Yes | 1.5 | 28.0 | 30.7 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) | -| YOLOv5-s | P5 | 640 | Yes | Yes | 2.7 | 37.7 | 40.2 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) | -| YOLOv5-m | P5 | 640 | Yes | Yes | 5.0 | 45.3 | 46.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) | -| YOLOv5-l | P5 | 640 | Yes | Yes | 8.1 | 48.8 | 49.9 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) | -| YOLOv5-n | P6 | 1280 | Yes | Yes | 5.8 | 35.9 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) | -| YOLOv5-s | P6 | 1280 | Yes | Yes | 10.5 | 44.4 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) | -| YOLOv5-m | P6 | 1280 | Yes | Yes | 19.1 | 51.3 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) | -| YOLOv5-l | P6 | 1280 | Yes | Yes | 30.5 | 53.7 | | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) | +| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) | box AP | TTA box AP | Config | Download | +| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :--------: | :-------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| YOLOv5-n | P5 | 640 | No | Yes | Yes | 1.5 | 28.0 | 30.7 | [config](../yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json) | +| YOLOv5-n | P5 | 640 | Yes | Yes | Yes | 1.5 | 28.0 | | [config](../yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_152706-712fb1b2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_152706.log.json) | +| YOLOv5-s | P5 | 640 | No | Yes | Yes | 2.7 | 37.7 | 40.2 | [config](../yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) | +| YOLOv5-s | P5 | 640 | Yes | Yes | Yes | 2.7 | 38.0 (+0.3) | | [config](../yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230304_033134-8e0cd271.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230304_033134.log.json) | +| YOLOv5-m | P5 | 640 | No | Yes | Yes | 5.0 | 45.3 | 46.9 | [config](../yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json) | +| YOLOv5-m | P5 | 640 | Yes | Yes | Yes | 5.0 | 45.3 | | [config](../yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_153946-44e96155.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_153946.log.json) | +| YOLOv5-l | P5 | 640 | No | Yes | Yes | 8.1 | 48.8 | 49.9 | [config](../yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json) | +| YOLOv5-l | P5 | 640 | Yes | Yes | Yes | 8.1 | 49.3 (+0.5) | | [config](../yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_154301-2c1d912a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_154301.log.json) | +| YOLOv5-x | P5 | 640 | No | Yes | Yes | 12.2 | 50.2 | | [config](../yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco/yolov5_x-v61_syncbn_fast_8xb16-300e_coco_20230305_152943-00776a4b.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco/yolov5_x-v61_syncbn_fast_8xb16-300e_coco_20230305_152943.log.json) | +| YOLOv5-x | P5 | 640 | Yes | Yes | Yes | 12.2 | 50.9 (+0.7) | | [config](../yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_154321-07edeb62.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_154321.log.json) | +| YOLOv5-n | P6 | 1280 | No | Yes | Yes | 5.8 | 35.9 | | [config](../yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) | +| YOLOv5-s | P6 | 1280 | No | Yes | Yes | 10.5 | 44.4 | | [config](../yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) | +| YOLOv5-m | P6 | 1280 | No | Yes | Yes | 19.1 | 51.3 | | [config](../yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) | +| YOLOv5-l | P6 | 1280 | No | Yes | Yes | 30.5 | 53.7 | | [config](../yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) | **Note**: -In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details. 1. `fast` means that `YOLOv5DetDataPreprocessor` and `yolov5_collate` are used for data preprocessing, which is faster for training, but less flexible for multitasking. Recommended to use fast version config if you only care about object detection. 2. `detect` means that the network input is fixed to `640x640` and the post-processing thresholds is modified. @@ -40,6 +45,7 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO 4. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code. 5. The performance is unstable and may fluctuate by about 0.4 mAP and the highest performance weight in `COCO` training in `YOLOv5` may not be the last epoch. 6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable. see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details. +7. The performance of `Mask Refine` training is for the weight performance officially released by YOLOv5. `Mask Refine` means refining bbox by mask while loading annotations and transforming after `YOLOv5RandomAffine`, `Copy Paste` means using `YOLOv5CopyPaste`. ### VOC diff --git a/configs/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 000000000..206eec3c4 --- /dev/null +++ b/configs/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,77 @@ +_base_ = './yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py' + +# This config use refining bbox and `YOLOv5CopyPaste`. +# Refining bbox means refining bbox by mask while loading annotations and +# transforming after `YOLOv5RandomAffine` + +# ========================modified parameters====================== +deepen_factor = 1.0 +widen_factor = 1.0 + +mixup_prob = 0.1 +copypaste_prob = 0.1 + +# =======================Unmodified in most cases================== +img_scale = _base_.img_scale + +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + +pre_transform = _base_.pre_transform +albu_train_transforms = _base_.albu_train_transforms + +mosaic_affine_pipeline = [ + dict( + type='Mosaic', + img_scale=img_scale, + pad_val=114.0, + pre_transform=pre_transform), + dict(type='YOLOv5CopyPaste', prob=copypaste_prob), + dict( + type='YOLOv5RandomAffine', + max_rotate_degree=0.0, + max_shear_degree=0.0, + scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale), + # img_scale is (width, height) + border=(-img_scale[0] // 2, -img_scale[1] // 2), + border_val=(114, 114, 114), + min_area_ratio=_base_.min_area_ratio, + use_mask_refine=_base_.use_mask2refine), + dict(type='RemoveDataElement', keys=['gt_masks']) +] + +# enable mixup and copypaste +train_pipeline = [ + *pre_transform, *mosaic_affine_pipeline, + dict( + type='YOLOv5MixUp', + prob=mixup_prob, + pre_transform=[*pre_transform, *mosaic_affine_pipeline]), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={ + 'img': 'image', + 'gt_bboxes': 'bboxes' + }), + dict(type='YOLOv5HSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', + 'flip_direction')) +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 000000000..4af27a917 --- /dev/null +++ b/configs/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,86 @@ +_base_ = './yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py' + +# This config will refine bbox by mask while loading annotations and +# transforming after `YOLOv5RandomAffine` + +# ========================modified parameters====================== +deepen_factor = 0.67 +widen_factor = 0.75 +lr_factor = 0.1 +loss_cls_weight = 0.3 +loss_obj_weight = 0.7 + +affine_scale = 0.9 +mixup_prob = 0.1 + +# =======================Unmodified in most cases================== +num_classes = _base_.num_classes +num_det_layers = _base_.num_det_layers +img_scale = _base_.img_scale + +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict( + head_module=dict(widen_factor=widen_factor), + loss_cls=dict(loss_weight=loss_cls_weight * + (num_classes / 80 * 3 / num_det_layers)), + loss_obj=dict(loss_weight=loss_obj_weight * + ((img_scale[0] / 640)**2 * 3 / num_det_layers)))) + +pre_transform = _base_.pre_transform +albu_train_transforms = _base_.albu_train_transforms + +mosaic_affine_pipeline = [ + dict( + type='Mosaic', + img_scale=img_scale, + pad_val=114.0, + pre_transform=pre_transform), + dict( + type='YOLOv5RandomAffine', + max_rotate_degree=0.0, + max_shear_degree=0.0, + scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), + # img_scale is (width, height) + border=(-img_scale[0] // 2, -img_scale[1] // 2), + border_val=(114, 114, 114), + min_area_ratio=_base_.min_area_ratio, + use_mask_refine=_base_.use_mask2refine), + dict(type='RemoveDataElement', keys=['gt_masks']) +] + +# enable mixup +train_pipeline = [ + *pre_transform, *mosaic_affine_pipeline, + dict( + type='YOLOv5MixUp', + prob=mixup_prob, + pre_transform=[*pre_transform, *mosaic_affine_pipeline]), + dict( + type='mmdet.Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={ + 'img': 'image', + 'gt_bboxes': 'bboxes' + }), + dict(type='YOLOv5HSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', + 'flip_direction')) +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) +default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor)) diff --git a/configs/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 000000000..3fe8dc32c --- /dev/null +++ b/configs/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,20 @@ +_base_ = './yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py' + +# This config will refine bbox by mask while loading annotations and +# transforming after `YOLOv5RandomAffine` + +# ========================modified parameters====================== +deepen_factor = 0.33 +widen_factor = 0.25 + +# ===============================Unmodified in most cases==================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) diff --git a/configs/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 000000000..3f4fa588f --- /dev/null +++ b/configs/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,62 @@ +_base_ = '../yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' + +# This config will refine bbox by mask while loading annotations and +# transforming after `YOLOv5RandomAffine` + +# ========================modified parameters====================== +use_mask2refine = True +min_area_ratio = 0.01 # YOLOv5RandomAffine + +# ===============================Unmodified in most cases==================== +pre_transform = [ + dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + mask2bbox=use_mask2refine) +] + +last_transform = [ + # Delete gt_masks to avoid more computation + dict(type='RemoveDataElement', keys=['gt_masks']), + dict( + type='mmdet.Albu', + transforms=_base_.albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), + keymap={ + 'img': 'image', + 'gt_bboxes': 'bboxes' + }), + dict(type='YOLOv5HSVRandomAug'), + dict(type='mmdet.RandomFlip', prob=0.5), + dict( + type='mmdet.PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', + 'flip_direction')) +] + +train_pipeline = [ + *pre_transform, + dict( + type='Mosaic', + img_scale=_base_.img_scale, + pad_val=114.0, + pre_transform=pre_transform), + dict( + type='YOLOv5RandomAffine', + max_rotate_degree=0.0, + max_shear_degree=0.0, + scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale), + # img_scale is (width, height) + border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2), + border_val=(114, 114, 114), + min_area_ratio=min_area_ratio, + use_mask_refine=use_mask2refine), + *last_transform +] + +train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/configs/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py new file mode 100644 index 000000000..fb76f1057 --- /dev/null +++ b/configs/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py @@ -0,0 +1,21 @@ +_base_ = './yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py' + +# This config use refining bbox and `YOLOv5CopyPaste`. +# Refining bbox means refining bbox by mask while loading annotations and +# transforming after `YOLOv5RandomAffine` + +# ========================modified parameters====================== +deepen_factor = 1.33 +widen_factor = 1.25 + +# ===============================Unmodified in most cases==================== +model = dict( + backbone=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + neck=dict( + deepen_factor=deepen_factor, + widen_factor=widen_factor, + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor))) diff --git a/configs/yolov5/metafile.yml b/configs/yolov5/metafile.yml index c64f38e5b..bfa92bdbe 100644 --- a/configs/yolov5/metafile.yml +++ b/configs/yolov5/metafile.yml @@ -80,6 +80,18 @@ Models: Metrics: box AP: 48.8 Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth + - Name: yolov5_x-v61_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 12.2 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 50.2 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco/yolov5_x-v61_syncbn_fast_8xb16-300e_coco_20230305_152943-00776a4b.pth - Name: yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco In Collection: YOLOv5 Config: configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -176,3 +188,63 @@ Models: Metrics: box AP: 73.1 Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500-edc7e0d8.pth + - Name: yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 1.5 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 28.0 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_n_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_152706-712fb1b2.pth + - Name: yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 2.7 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.0 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_s_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230304_033134-8e0cd271.pth + - Name: yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 5.0 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.3 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_m_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_153946-44e96155.pth + - Name: yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 8.1 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 49.3 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_l_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_154301-2c1d912a.pth + - Name: yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco + In Collection: YOLOv5 + Config: configs/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco.py + Metadata: + Training Memory (GB): 12.2 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 50.9 + Weights: https://download.openmmlab.com/mmyolo/v0/yolov5/mask_refine/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco/yolov5_x_mask-refine-v61_syncbn_fast_8xb16-300e_coco_20230305_154321-07edeb62.pth