init

open-mmlab · Jul 1, 2023 · 37a943f · 37a943f
1 parent c40a2d4
commit 37a943f
Show file tree

Hide file tree

Showing 9 changed files with 744 additions and 0 deletions.
diff --git a/projects/uniformer/README.md b/projects/uniformer/README.md
@@ -0,0 +1,31 @@
+# Pose Estion with UniFormer
+
+This project implements a topdown heatmap based human pose estimator, utilizing the approach outlined in **UniFormer: Unifying Convolution and Self-attention for Visual Recognition** (TPAMI 2023) and **UniFormer: Unified Transformer for Efficient Spatiotemporal Representation Learning** (ICLR 2022).
+
+## Usage
+
+## Citation
+
+If this project benefits your work, please kindly consider citing the original papers:
+
+```bibtex
+@misc{li2022uniformer,
+      title={UniFormer: Unifying Convolution and Self-attention for Visual Recognition}, 
+      author={Kunchang Li and Yali Wang and Junhao Zhang and Peng Gao and Guanglu Song and Yu Liu and Hongsheng Li and Yu Qiao},
+      year={2022},
+      eprint={2201.09450},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
+
+```bibtex
+@misc{li2022uniformer,
+      title={UniFormer: Unified Transformer for Efficient Spatiotemporal Representation Learning}, 
+      author={Kunchang Li and Yali Wang and Peng Gao and Guanglu Song and Yu Liu and Hongsheng Li and Yu Qiao},
+      year={2022},
+      eprint={2201.04676},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
diff --git a/projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-256x192.py b/projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-256x192.py
@@ -0,0 +1,133 @@
+_base_ = ['mmpose::_base_/default_runtime.py']
+
+custom_imports = dict(imports='projects.uniformer.models')
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+    dict(
+        type='LinearLR', begin=0, end=500, start_factor=0.001,
+        by_epoch=False),  # warm-up
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=210,
+        milestones=[170, 200],
+        gamma=0.1,
+        by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=5))
+
+# codec settings
+codec = dict(
+    type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    type='Uniformer',
+    # pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here
+    data_preprocessor=dict(
+        type='PoseDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True),
+    backbone=dict(
+        type='Uniformer',
+        embed_dim=[64, 128, 320, 512],
+        layers=[5, 8, 20, 7],
+        head_dim=64,
+        drop_path_rate=0.4,
+        use_checkpoint=False,
+        windows=False,
+        hybrid=False,
+        init_cfg=dict(type='Pretrained', checkpoint='')),
+    head=dict(
+        type='TopdownSimpleHead',
+        in_channels=512,
+        out_channels=17,
+        norm_cfg=norm_cfg,
+        extra=dict(final_conv_kernel=1, ),
+        loss=dict(type='JointMSELoss', use_target_weight=True),
+        decoder=codec),
+    test_cfg=dict(
+        flip_test=True,
+        flip_mode='heatmap',
+        shift_heatmap=True,
+    ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='RandomFlip', direction='horizontal'),
+    dict(type='RandomHalfBody'),
+    dict(type='RandomBBoxTransform'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='GenerateTarget', encoder=codec),
+    dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+    dict(type='LoadImage'),
+    dict(type='GetBBoxCenterScale'),
+    dict(type='TopdownAffine', input_size=codec['input_size']),
+    dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=128,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_train2017.json',
+        data_prefix=dict(img='train2017/'),
+        pipeline=train_pipeline,
+    ))
+val_dataloader = dict(
+    batch_size=256,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_mode=data_mode,
+        ann_file='annotations/person_keypoints_val2017.json',
+        bbox_file='data/coco/person_detection_results/'
+        'COCO_val2017_detections_AP_H_56_person.json',
+        data_prefix=dict(img='val2017/'),
+        test_mode=True,
+        pipeline=val_pipeline,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+    type='CocoMetric',
+    ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-384x288.py b/projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-384x288.py
@@ -0,0 +1,22 @@
+_base_ = ['./td-hm_uniformer-b-8xb32-210e_coco-256x192.py']
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=2e-3,
+))
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+    type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+model = dict(
+    # pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here
+    backbone=dict(drop_path_rate=0.4),
+    test_cfg=dict())
+
+train_dataloader = dict(batch_size=32)
+val_dataloader = dict(batch_size=256)
diff --git a/projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-448x320.py b/projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-448x320.py
@@ -0,0 +1,16 @@
+_base_ = ['./td-hm_uniformer-b-8xb32-210e_coco-256x192.py']
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=10))
+
+# codec settings
+codec = dict(
+    type='MSRAHeatmap', input_size=(320, 488), heatmap_size=(80, 112), sigma=3)
+
+model = dict(
+    # pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here
+    backbone=dict(drop_path_rate=0.55),
+    test_cfg=dict())
+
+train_dataloader = dict(batch_size=32)
+val_dataloader = dict(batch_size=256)
diff --git a/projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-256x192.py b/projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-256x192.py
@@ -0,0 +1,8 @@
+_base_ = ['./_base_/td-hm_uniformer-b-8xb32-210e_coco-256x192']
+
+model = dict(
+    # pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here
+    backbone=dict(
+        layers=[3, 4, 8, 3],
+        drop_path_rate=0.2
+    ))
diff --git a/projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-384x288.py b/projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-384x288.py
@@ -0,0 +1,24 @@
+_base_ = ['./_basae_/td-hm_uniformer-b-8xb32-210e_coco-384x288.py']
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=5))
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=2e-3,
+))
+
+# codec settings
+codec = dict(
+    type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+model = dict(
+    # pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here
+    backbone=dict(
+        layers=[3, 4, 8, 3],
+        drop_path_rate=0.2),
+    test_cfg=dict())
+
+train_dataloader = dict(batch_size=32)
+val_dataloader = dict(batch_size=256)
diff --git a/projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-448x320.py b/projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-448x320.py
@@ -0,0 +1,24 @@
+_base_ = ['./_base_/td-hm_uniformer-b-8xb32-210e_coco-448x320.py']
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=5))
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+    type='Adam',
+    lr=1.0e-3,
+))
+
+# codec settings
+codec = dict(
+    type='MSRAHeatmap', input_size=(320, 488), heatmap_size=(80, 112), sigma=3)
+
+model = dict(
+    # pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here
+    backbone=dict(
+        layers=[3, 4, 8, 3],
+        drop_path_rate=0.2),
+    test_cfg=dict())
+
+train_dataloader = dict(batch_size=32)
+val_dataloader = dict(batch_size=256)
diff --git a/projects/uniformer/models/__init__.py b/projects/uniformer/models/__init__.py
@@ -0,0 +1,3 @@
+from .uniformer import * # noqa
+
+__all__ = ['Uniformer']