1
+ _base_ = [
2
+ '../_base_/models/depthformer_swin.py' ,
3
+ '../_base_/default_runtime.py'
4
+ ]
5
+ USEPE_FLAG = True
6
+ depth_scale = 250
7
+ # dataset settings
8
+ dataset_type = 'DDADDataset'
9
+ data_root = 'data/DDAD'
10
+ img_norm_cfg = dict (
11
+ mean = [123.675 , 116.28 , 103.53 ], std = [58.395 , 57.12 , 57.375 ], to_rgb = True )
12
+ train_pipeline = [
13
+ dict (type = 'LoadDDADImageFromFile' ,USEPE = USEPE_FLAG ,USE_DYNAMIC_PE = True ),
14
+ dict (type = 'DDADDepthLoadAnnotations' ,USE_DYNAMIC_PE = True ),
15
+ dict (type = 'DDADResize' ,shape = (384 , 640 ),USE_DYNAMIC_PE = True ),
16
+ dict (type = 'Resize' ,ratio_range = (0.5 ,2.0 )),
17
+ dict (type = 'Padding' ,img_padding_value = (0 ,0 ,0 ),depth_padding_value = 255 ,pe_k = True ,ori_h = 384 ,ori_w = 640 ),
18
+ dict (type = 'RandomRotate' , prob = 0.5 , degree = 2.5 ),
19
+ dict (type = 'RandomFlip' , prob = 0.0 ),
20
+ dict (type = 'RandomCrop' , crop_size = (384 , 640 )),
21
+ dict (type = 'ColorAug' , prob = 0.5 , gamma_range = [0.9 , 1.1 ], brightness_range = [0.9 , 1.1 ], color_range = [0.9 , 1.1 ]),
22
+ dict (type = 'Normalize' , depth_scale = depth_scale , ** img_norm_cfg ),
23
+ dict (type = 'DefaultFormatBundle' ),
24
+ dict (type = 'Collect' ,
25
+ keys = ['img' , 'depth_gt' ,'pe_k_gt' ,'height' ],
26
+ meta_keys = ('filename' , 'ori_filename' , 'ori_shape' ,
27
+ 'img_shape' , 'pad_shape' , 'scale_factor' ,
28
+ 'flip' , 'flip_direction' , 'img_norm_cfg' ,)),
29
+ ]
30
+ test_pipeline = [
31
+ dict (type = 'LoadDDADImageFromFile' ,USEPE = USEPE_FLAG ,USE_DYNAMIC_PE = True ),
32
+ dict (type = 'DDADResize' ,shape = (384 , 640 ),depth = False ),
33
+ dict (
34
+ type = 'MultiScaleFlipAug' ,
35
+ img_scale = (384 , 640 ),
36
+ flip = False ,
37
+ flip_direction = 'horizontal' ,
38
+ transforms = [
39
+ dict (type = 'Normalize' , depth_scale = depth_scale ,** img_norm_cfg ),
40
+ dict (type = 'ImageToTensor' , keys = ['img' ]),
41
+ dict (type = 'Collect' ,
42
+ keys = ['img' ,'height' ,'test' ],
43
+ meta_keys = ('filename' , 'ori_filename' , 'ori_shape' ,
44
+ 'img_shape' , 'pad_shape' , 'scale_factor' ,
45
+ 'flip' , 'flip_direction' , 'img_norm_cfg' ,)),
46
+ ])
47
+ ]
48
+ data = dict (
49
+ samples_per_gpu = 4 ,
50
+ workers_per_gpu = 4 ,
51
+ train_dataloader = dict (
52
+ shuffle = True ,
53
+ drop_last = True ,
54
+ persistent_workers = False ),
55
+ val_dataloader = dict (
56
+ shuffle = False ,
57
+ persistent_workers = True ),
58
+ test_dataloader = dict (
59
+ shuffle = False ,
60
+ persistent_workers = False ),
61
+ train = dict (
62
+ type = dataset_type ,
63
+ split = 'splits/ddad_train_split.txt' ,
64
+ pipeline = train_pipeline ,
65
+ min_depth = 1e-3 ,
66
+ max_depth = 200 ,
67
+ cameras = ['CAMERA_%02d' % idx for idx in [1 ,5 ,6 ,9 ]],
68
+ ),
69
+ val = dict (
70
+ type = dataset_type ,
71
+ split = 'splits/ddad_test_split.txt' ,
72
+ pipeline = test_pipeline ,
73
+ min_depth = 1e-3 ,
74
+ max_depth = 200 ,
75
+ cameras = ['CAMERA_%02d' % idx for idx in [1 ,5 ,6 ,9 ]],
76
+ ),
77
+ test = dict (
78
+ type = dataset_type ,
79
+ split = 'splits/ddad_test_split.txt' ,
80
+ pipeline = test_pipeline ,
81
+ min_depth = 1e-3 ,
82
+ max_depth = 200 ,
83
+ cameras = ['CAMERA_%02d' % idx for idx in [1 ,5 ,6 ,9 ]],
84
+ ))
85
+
86
+
87
+
88
+ model = dict (
89
+ pretrained = None ,
90
+ depth_scale = depth_scale ,
91
+ backbone = dict (
92
+ embed_dims = 192 ,
93
+ depths = [2 , 2 , 18 , 2 ],
94
+ num_heads = [6 , 12 , 24 , 48 ],
95
+ window_size = 7 ,
96
+ USEPE = USEPE_FLAG ),
97
+ neck = dict (
98
+ type = 'HAHIHeteroNeck' ,
99
+ positional_encoding = dict (
100
+ type = 'SinePositionalEncoding' , num_feats = 256 ),
101
+ in_channels = [64 , 192 , 384 , 768 , 1536 ],
102
+ out_channels = [64 , 192 , 384 , 768 , 1536 ],
103
+ embedding_dim = 512 ,
104
+ scales = [1 , 1 , 1 , 1 , 1 ]),
105
+ pe_mask_neck = dict (
106
+ type = 'LightPEMASKNeck'
107
+ ),
108
+ dynamic_pe_neck = dict (
109
+ type = 'DynamicPENeckSOFT'
110
+ ),
111
+ decode_head = dict (
112
+ type = 'DenseDepthHead' ,
113
+ act_cfg = dict (type = 'LeakyReLU' , inplace = True ),
114
+ in_channels = [64 , 192 , 384 , 768 , 1536 ],
115
+ up_sample_channels = [64 , 192 , 384 , 768 , 1536 ],
116
+ channels = 64 ,
117
+ min_depth = 1e-3 ,
118
+ max_depth = 200 ,
119
+ ))
120
+ # schedules
121
+ # optimizer
122
+ max_lr = 1e-4
123
+ optimizer = dict (
124
+ type = 'AdamW' ,
125
+ lr = max_lr ,
126
+ betas = (0.9 , 0.999 ),
127
+ weight_decay = 0.01 ,
128
+ paramwise_cfg = dict (
129
+ custom_keys = {
130
+ 'absolute_pos_embed' : dict (decay_mult = 0. ),
131
+ 'relative_position_bias_table' : dict (decay_mult = 0. ),
132
+ 'norm' : dict (decay_mult = 0. ),
133
+ }))
134
+ # learning policy
135
+ lr_config = dict (
136
+ policy = 'CosineAnnealing' ,
137
+ min_lr_ratio = 1e-8 ,
138
+ by_epoch = False ) # test add by_epoch false
139
+ optimizer_config = dict (grad_clip = dict (max_norm = 35 , norm_type = 2 ))
140
+ # runtime settings
141
+ runner = dict (type = 'IterBasedRunner' , max_iters = 38400 )
142
+ checkpoint_config = dict (by_epoch = False , max_keep_ckpts = 2 , interval = 800 )
143
+ evaluation = dict (by_epoch = False ,
144
+ start = 0 ,
145
+ interval = 800 ,
146
+ pre_eval = True ,
147
+ rule = 'less' ,
148
+ save_best = 'abs_rel' ,
149
+ greater_keys = ("a1" , "a2" , "a3" ),
150
+ less_keys = ("abs_rel" , "rmse" ))
151
+ # iter runtime
152
+ log_config = dict (
153
+ _delete_ = True ,
154
+ interval = 50 ,
155
+ hooks = [
156
+ dict (type = 'TextLoggerHook' , by_epoch = False ),
157
+ dict (type = 'TensorboardLoggerHook' )
158
+ ])
0 commit comments