-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
744 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Pose Estion with UniFormer | ||
|
||
This project implements a topdown heatmap based human pose estimator, utilizing the approach outlined in **UniFormer: Unifying Convolution and Self-attention for Visual Recognition** (TPAMI 2023) and **UniFormer: Unified Transformer for Efficient Spatiotemporal Representation Learning** (ICLR 2022). | ||
|
||
## Usage | ||
|
||
## Citation | ||
|
||
If this project benefits your work, please kindly consider citing the original papers: | ||
|
||
```bibtex | ||
@misc{li2022uniformer, | ||
title={UniFormer: Unifying Convolution and Self-attention for Visual Recognition}, | ||
author={Kunchang Li and Yali Wang and Junhao Zhang and Peng Gao and Guanglu Song and Yu Liu and Hongsheng Li and Yu Qiao}, | ||
year={2022}, | ||
eprint={2201.09450}, | ||
archivePrefix={arXiv}, | ||
primaryClass={cs.CV} | ||
} | ||
``` | ||
|
||
```bibtex | ||
@misc{li2022uniformer, | ||
title={UniFormer: Unified Transformer for Efficient Spatiotemporal Representation Learning}, | ||
author={Kunchang Li and Yali Wang and Peng Gao and Guanglu Song and Yu Liu and Hongsheng Li and Yu Qiao}, | ||
year={2022}, | ||
eprint={2201.04676}, | ||
archivePrefix={arXiv}, | ||
primaryClass={cs.CV} | ||
} | ||
``` |
133 changes: 133 additions & 0 deletions
133
projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-256x192.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
_base_ = ['mmpose::_base_/default_runtime.py'] | ||
|
||
custom_imports = dict(imports='projects.uniformer.models') | ||
|
||
# runtime | ||
train_cfg = dict(max_epochs=210, val_interval=10) | ||
|
||
# optimizer | ||
optim_wrapper = dict(optimizer=dict( | ||
type='Adam', | ||
lr=5e-4, | ||
)) | ||
|
||
# learning policy | ||
param_scheduler = [ | ||
dict( | ||
type='LinearLR', begin=0, end=500, start_factor=0.001, | ||
by_epoch=False), # warm-up | ||
dict( | ||
type='MultiStepLR', | ||
begin=0, | ||
end=210, | ||
milestones=[170, 200], | ||
gamma=0.1, | ||
by_epoch=True) | ||
] | ||
|
||
# automatically scaling LR based on the actual training batch size | ||
auto_scale_lr = dict(base_batch_size=256) | ||
|
||
# hooks | ||
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=5)) | ||
|
||
# codec settings | ||
codec = dict( | ||
type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2) | ||
|
||
# model settings | ||
norm_cfg = dict(type='SyncBN', requires_grad=True) | ||
model = dict( | ||
type='Uniformer', | ||
# pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here | ||
data_preprocessor=dict( | ||
type='PoseDataPreprocessor', | ||
mean=[123.675, 116.28, 103.53], | ||
std=[58.395, 57.12, 57.375], | ||
bgr_to_rgb=True), | ||
backbone=dict( | ||
type='Uniformer', | ||
embed_dim=[64, 128, 320, 512], | ||
layers=[5, 8, 20, 7], | ||
head_dim=64, | ||
drop_path_rate=0.4, | ||
use_checkpoint=False, | ||
windows=False, | ||
hybrid=False, | ||
init_cfg=dict(type='Pretrained', checkpoint='')), | ||
head=dict( | ||
type='TopdownSimpleHead', | ||
in_channels=512, | ||
out_channels=17, | ||
norm_cfg=norm_cfg, | ||
extra=dict(final_conv_kernel=1, ), | ||
loss=dict(type='JointMSELoss', use_target_weight=True), | ||
decoder=codec), | ||
test_cfg=dict( | ||
flip_test=True, | ||
flip_mode='heatmap', | ||
shift_heatmap=True, | ||
)) | ||
|
||
# base dataset settings | ||
dataset_type = 'CocoDataset' | ||
data_mode = 'topdown' | ||
data_root = 'data/coco/' | ||
|
||
# pipelines | ||
train_pipeline = [ | ||
dict(type='LoadImage'), | ||
dict(type='GetBBoxCenterScale'), | ||
dict(type='RandomFlip', direction='horizontal'), | ||
dict(type='RandomHalfBody'), | ||
dict(type='RandomBBoxTransform'), | ||
dict(type='TopdownAffine', input_size=codec['input_size']), | ||
dict(type='GenerateTarget', encoder=codec), | ||
dict(type='PackPoseInputs') | ||
] | ||
|
||
val_pipeline = [ | ||
dict(type='LoadImage'), | ||
dict(type='GetBBoxCenterScale'), | ||
dict(type='TopdownAffine', input_size=codec['input_size']), | ||
dict(type='PackPoseInputs') | ||
] | ||
|
||
# data loaders | ||
train_dataloader = dict( | ||
batch_size=128, | ||
num_workers=2, | ||
persistent_workers=True, | ||
sampler=dict(type='DefaultSampler', shuffle=True), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
data_mode=data_mode, | ||
ann_file='annotations/person_keypoints_train2017.json', | ||
data_prefix=dict(img='train2017/'), | ||
pipeline=train_pipeline, | ||
)) | ||
val_dataloader = dict( | ||
batch_size=256, | ||
num_workers=2, | ||
persistent_workers=True, | ||
drop_last=False, | ||
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
data_mode=data_mode, | ||
ann_file='annotations/person_keypoints_val2017.json', | ||
bbox_file='data/coco/person_detection_results/' | ||
'COCO_val2017_detections_AP_H_56_person.json', | ||
data_prefix=dict(img='val2017/'), | ||
test_mode=True, | ||
pipeline=val_pipeline, | ||
)) | ||
test_dataloader = val_dataloader | ||
|
||
# evaluators | ||
val_evaluator = dict( | ||
type='CocoMetric', | ||
ann_file=data_root + 'annotations/person_keypoints_val2017.json') | ||
test_evaluator = val_evaluator |
22 changes: 22 additions & 0 deletions
22
projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-384x288.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
_base_ = ['./td-hm_uniformer-b-8xb32-210e_coco-256x192.py'] | ||
|
||
# optimizer | ||
optim_wrapper = dict(optimizer=dict( | ||
type='Adam', | ||
lr=2e-3, | ||
)) | ||
|
||
# hooks | ||
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater')) | ||
|
||
# codec settings | ||
codec = dict( | ||
type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) | ||
|
||
model = dict( | ||
# pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here | ||
backbone=dict(drop_path_rate=0.4), | ||
test_cfg=dict()) | ||
|
||
train_dataloader = dict(batch_size=32) | ||
val_dataloader = dict(batch_size=256) |
16 changes: 16 additions & 0 deletions
16
projects/uniformer/configs/_base_/td-hm_uniformer-b-8xb32-210e_coco-448x320.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
_base_ = ['./td-hm_uniformer-b-8xb32-210e_coco-256x192.py'] | ||
|
||
# hooks | ||
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=10)) | ||
|
||
# codec settings | ||
codec = dict( | ||
type='MSRAHeatmap', input_size=(320, 488), heatmap_size=(80, 112), sigma=3) | ||
|
||
model = dict( | ||
# pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here | ||
backbone=dict(drop_path_rate=0.55), | ||
test_cfg=dict()) | ||
|
||
train_dataloader = dict(batch_size=32) | ||
val_dataloader = dict(batch_size=256) |
8 changes: 8 additions & 0 deletions
8
projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-256x192.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
_base_ = ['./_base_/td-hm_uniformer-b-8xb32-210e_coco-256x192'] | ||
|
||
model = dict( | ||
# pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here | ||
backbone=dict( | ||
layers=[3, 4, 8, 3], | ||
drop_path_rate=0.2 | ||
)) |
24 changes: 24 additions & 0 deletions
24
projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-384x288.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
_base_ = ['./_basae_/td-hm_uniformer-b-8xb32-210e_coco-384x288.py'] | ||
|
||
# hooks | ||
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=5)) | ||
|
||
# optimizer | ||
optim_wrapper = dict(optimizer=dict( | ||
type='Adam', | ||
lr=2e-3, | ||
)) | ||
|
||
# codec settings | ||
codec = dict( | ||
type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3) | ||
|
||
model = dict( | ||
# pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here | ||
backbone=dict( | ||
layers=[3, 4, 8, 3], | ||
drop_path_rate=0.2), | ||
test_cfg=dict()) | ||
|
||
train_dataloader = dict(batch_size=32) | ||
val_dataloader = dict(batch_size=256) |
24 changes: 24 additions & 0 deletions
24
projects/uniformer/configs/td-hm_uniformer-s-8xb32-210e_coco-448x320.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
_base_ = ['./_base_/td-hm_uniformer-b-8xb32-210e_coco-448x320.py'] | ||
|
||
# hooks | ||
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater', interval=5)) | ||
|
||
# optimizer | ||
optim_wrapper = dict(optimizer=dict( | ||
type='Adam', | ||
lr=1.0e-3, | ||
)) | ||
|
||
# codec settings | ||
codec = dict( | ||
type='MSRAHeatmap', input_size=(320, 488), heatmap_size=(80, 112), sigma=3) | ||
|
||
model = dict( | ||
# pretrained='/path/to/hrt_small.pth', # Set the path to pretrained backbone here | ||
backbone=dict( | ||
layers=[3, 4, 8, 3], | ||
drop_path_rate=0.2), | ||
test_cfg=dict()) | ||
|
||
train_dataloader = dict(batch_size=32) | ||
val_dataloader = dict(batch_size=256) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .uniformer import * # noqa | ||
|
||
__all__ = ['Uniformer'] |
Oops, something went wrong.