forked from open-mmlab/mmdetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
grounding_dino_swin-t_finetune_8xb4_50e_people_in_painting.py
109 lines (100 loc) · 3.73 KB
/
grounding_dino_swin-t_finetune_8xb4_50e_people_in_painting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
_base_ = '../grounding_dino_swin-t_pretrain_obj365.py'
# https://universe.roboflow.com/roboflow-100/people-in-paintings/dataset/2
data_root = 'data/people_in_painting_v2/'
class_name = ('Human', )
palette = [(220, 20, 60)]
metainfo = dict(classes=class_name, palette=palette)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RandomFlip', prob=0.5),
dict(
type='RandomChoice',
transforms=[
[
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
keep_ratio=True)
],
[
dict(
type='RandomChoiceResize',
# The radio of all image in train dataset < 7
# follow the original implement
scales=[(400, 4200), (500, 4200), (600, 4200)],
keep_ratio=True),
dict(
type='RandomCrop',
crop_type='absolute_range',
crop_size=(384, 600),
allow_negative_crop=True),
dict(
type='RandomChoiceResize',
scales=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
(608, 1333), (640, 1333), (672, 1333), (704, 1333),
(736, 1333), (768, 1333), (800, 1333)],
keep_ratio=True)
]
]),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'flip', 'flip_direction', 'text',
'custom_entities'))
]
train_dataloader = dict(
sampler=dict(_delete_=True, type='DefaultSampler', shuffle=True),
batch_sampler=dict(type='AspectRatioBatchSampler'),
dataset=dict(
_delete_=True,
type='RepeatDataset',
times=10,
dataset=dict(
type='CocoDataset',
data_root=data_root,
metainfo=metainfo,
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline,
return_classes=True,
data_prefix=dict(img='train/'),
ann_file='train/_annotations.coco.json')))
val_dataloader = dict(
dataset=dict(
metainfo=metainfo,
data_root=data_root,
return_classes=True,
ann_file='valid/_annotations.coco.json',
data_prefix=dict(img='valid/')))
test_dataloader = val_dataloader
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'valid/_annotations.coco.json',
metric='bbox',
format_only=False)
test_evaluator = val_evaluator
optim_wrapper = dict(
_delete_=True,
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
clip_grad=dict(max_norm=0.1, norm_type=2),
paramwise_cfg=dict(custom_keys={
'absolute_pos_embed': dict(decay_mult=0.),
'backbone': dict(lr_mult=0.1)
}))
# learning policy
max_epochs = 5
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=max_epochs,
by_epoch=True,
milestones=[4],
gamma=0.1)
]
train_cfg = dict(max_epochs=max_epochs, val_interval=1)
default_hooks = dict(checkpoint=dict(max_keep_ckpts=1, save_best='auto'))
load_from = 'https://download.openmmlab.com/mmdetection/v3.0/mm_grounding_dino/grounding_dino_swin-t_pretrain_obj365_goldg_grit9m_v3det/grounding_dino_swin-t_pretrain_obj365_goldg_grit9m_v3det_20231204_095047-b448804b.pth' # noqa