Skip to content

Commit d0cf7a4

Browse files
Ma-ZhuangRobinhoodKi
authored andcommitted
Adapt to ddad datasets
1 parent c0e37b3 commit d0cf7a4

24 files changed

+124588
-157
lines changed

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ __pycache__/
77

88
# Distribution / packaging
99
vidar
10-
*.txt
1110
log_vis/
1211
point-cloud*/
1312
*.ply

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ DepthFormer is used here as the baseline to exemplify the improvement by the pro
3131
| GEDepth-Vanilla | 0.049 | 0.144 | 2.061| [[Google Drive]](https://drive.google.com/drive/folders/1XQRl7AtSBBIPoXtZOh87M_LG0iAJPDl_?usp=sharing) [[Baidu Cloud]](https://pan.baidu.com/s/1bzFoxx_uFrcmx3uUNc03HQ?pwd=rjt1 )
3232
| GEDepth-Adaptive| 0.048 | 0.142| 2.044|[[Google Drive]](https://drive.google.com/drive/folders/1XQRl7AtSBBIPoXtZOh87M_LG0iAJPDl_?usp=sharing) [[Baidu Cloud]](https://pan.baidu.com/s/1bzFoxx_uFrcmx3uUNc03HQ?pwd=rjt1 )
3333

34+
* DDAD
35+
36+
| Model | Abs Rel | Sq Rel | RMSE | Checkpoint |
37+
| ------| -----| ------- | ------ | -------------|
38+
|Baseline | 0.152| 2.230| 11.051| [[Link]](https://github.com/zhyever/Monocular-Depth-Estimation-Toolbox) |
39+
| GEDepth-Vanilla | 0.149 | 2.121 | 10.790| [[Google Drive]](https://drive.google.com/drive/folders/1XQRl7AtSBBIPoXtZOh87M_LG0iAJPDl_?usp=sharing) [[Baidu Cloud]](https://pan.baidu.com/s/1bzFoxx_uFrcmx3uUNc03HQ?pwd=rjt1 )
40+
| GEDepth-Adaptive| 0.145 | 2.119| 10.596|[[Google Drive]](https://drive.google.com/drive/folders/1XQRl7AtSBBIPoXtZOh87M_LG0iAJPDl_?usp=sharing) [[Baidu Cloud]](https://pan.baidu.com/s/1bzFoxx_uFrcmx3uUNc03HQ?pwd=rjt1 )
3441

3542
## Citation
3643
Please cite the following paper if this repo helps your research:

configs/depthformer/depthformer_a.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
img_dir='input',
6161
ann_dir='gt_depth',
6262
depth_scale=256,
63-
split='kitti_eigen_train.txt',
63+
split='splits/kitti_eigen_train.txt',
6464
pipeline=train_pipeline,
6565
garg_crop=True,
6666
eigen_crop=False,
@@ -72,7 +72,7 @@
7272
img_dir='input',
7373
ann_dir='gt_depth',
7474
depth_scale=256,
75-
split='kitti_eigen_test.txt',
75+
split='splits/kitti_eigen_test.txt',
7676
pipeline=test_pipeline,
7777
garg_crop=True,
7878
eigen_crop=False,
@@ -84,7 +84,7 @@
8484
img_dir='input',
8585
ann_dir='gt_depth',
8686
depth_scale=256,
87-
split='kitti_eigen_test.txt',
87+
split='splits/kitti_eigen_test.txt',
8888
pipeline=test_pipeline,
8989
garg_crop=True,
9090
eigen_crop=False,
+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
_base_ = [
2+
'../_base_/models/depthformer_swin.py',
3+
'../_base_/default_runtime.py'
4+
]
5+
USEPE_FLAG = True
6+
depth_scale = 250
7+
# dataset settings
8+
dataset_type = 'DDADDataset'
9+
data_root = 'data/DDAD'
10+
img_norm_cfg = dict(
11+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
12+
train_pipeline = [
13+
dict(type='LoadDDADImageFromFile',USEPE=USEPE_FLAG,USE_DYNAMIC_PE=True),
14+
dict(type='DDADDepthLoadAnnotations',USE_DYNAMIC_PE=True),
15+
dict(type='DDADResize',shape=(384, 640),USE_DYNAMIC_PE=True),
16+
dict(type='Resize',ratio_range=(0.5,2.0)),
17+
dict(type='Padding',img_padding_value=(0,0,0),depth_padding_value=255,pe_k=True,ori_h=384,ori_w=640),
18+
dict(type='RandomRotate', prob=0.5, degree=2.5),
19+
dict(type='RandomFlip', prob=0.0),
20+
dict(type='RandomCrop', crop_size=(384, 640)),
21+
dict(type='ColorAug', prob=0.5, gamma_range=[0.9, 1.1], brightness_range=[0.9, 1.1], color_range=[0.9, 1.1]),
22+
dict(type='Normalize', depth_scale = depth_scale, **img_norm_cfg),
23+
dict(type='DefaultFormatBundle'),
24+
dict(type='Collect',
25+
keys=['img', 'depth_gt','pe_k_gt','height'],
26+
meta_keys=('filename', 'ori_filename', 'ori_shape',
27+
'img_shape', 'pad_shape', 'scale_factor',
28+
'flip', 'flip_direction', 'img_norm_cfg',)),
29+
]
30+
test_pipeline = [
31+
dict(type='LoadDDADImageFromFile',USEPE=USEPE_FLAG,USE_DYNAMIC_PE=True),
32+
dict(type='DDADResize',shape=(384, 640),depth=False),
33+
dict(
34+
type='MultiScaleFlipAug',
35+
img_scale=(384, 640),
36+
flip=False,
37+
flip_direction='horizontal',
38+
transforms=[
39+
dict(type='Normalize', depth_scale=depth_scale,**img_norm_cfg),
40+
dict(type='ImageToTensor', keys=['img']),
41+
dict(type='Collect',
42+
keys=['img','height','test'],
43+
meta_keys=('filename', 'ori_filename', 'ori_shape',
44+
'img_shape', 'pad_shape', 'scale_factor',
45+
'flip', 'flip_direction', 'img_norm_cfg',)),
46+
])
47+
]
48+
data = dict(
49+
samples_per_gpu=4,
50+
workers_per_gpu=4,
51+
train_dataloader=dict(
52+
shuffle=True,
53+
drop_last=True,
54+
persistent_workers=False),
55+
val_dataloader=dict(
56+
shuffle=False,
57+
persistent_workers=True),
58+
test_dataloader=dict(
59+
shuffle=False,
60+
persistent_workers=False),
61+
train=dict(
62+
type=dataset_type,
63+
split='splits/ddad_train_split.txt',
64+
pipeline=train_pipeline,
65+
min_depth=1e-3,
66+
max_depth=200,
67+
cameras = ['CAMERA_%02d' % idx for idx in [1,5,6,9]],
68+
),
69+
val=dict(
70+
type=dataset_type,
71+
split='splits/ddad_test_split.txt',
72+
pipeline=test_pipeline,
73+
min_depth=1e-3,
74+
max_depth=200,
75+
cameras = ['CAMERA_%02d' % idx for idx in [1,5,6,9]],
76+
),
77+
test=dict(
78+
type=dataset_type,
79+
split='splits/ddad_test_split.txt',
80+
pipeline=test_pipeline,
81+
min_depth=1e-3,
82+
max_depth=200,
83+
cameras = ['CAMERA_%02d' % idx for idx in [1,5,6,9]],
84+
))
85+
86+
87+
88+
model = dict(
89+
pretrained=None,
90+
depth_scale = depth_scale,
91+
backbone=dict(
92+
embed_dims=192,
93+
depths=[2, 2, 18, 2],
94+
num_heads=[6, 12, 24, 48],
95+
window_size=7,
96+
USEPE = USEPE_FLAG),
97+
neck=dict(
98+
type='HAHIHeteroNeck',
99+
positional_encoding=dict(
100+
type='SinePositionalEncoding', num_feats=256),
101+
in_channels=[64, 192, 384, 768, 1536],
102+
out_channels=[64, 192, 384, 768, 1536],
103+
embedding_dim=512,
104+
scales=[1, 1, 1, 1, 1]),
105+
pe_mask_neck=dict(
106+
type='LightPEMASKNeck'
107+
),
108+
dynamic_pe_neck=dict(
109+
type='DynamicPENeckSOFT'
110+
),
111+
decode_head=dict(
112+
type='DenseDepthHead',
113+
act_cfg=dict(type='LeakyReLU', inplace=True),
114+
in_channels=[64, 192, 384, 768, 1536],
115+
up_sample_channels=[64, 192, 384, 768, 1536],
116+
channels=64,
117+
min_depth=1e-3,
118+
max_depth=200,
119+
))
120+
# schedules
121+
# optimizer
122+
max_lr=1e-4
123+
optimizer = dict(
124+
type='AdamW',
125+
lr=max_lr,
126+
betas=(0.9, 0.999),
127+
weight_decay=0.01,
128+
paramwise_cfg=dict(
129+
custom_keys={
130+
'absolute_pos_embed': dict(decay_mult=0.),
131+
'relative_position_bias_table': dict(decay_mult=0.),
132+
'norm': dict(decay_mult=0.),
133+
}))
134+
# learning policy
135+
lr_config = dict(
136+
policy='CosineAnnealing',
137+
min_lr_ratio=1e-8,
138+
by_epoch=False) # test add by_epoch false
139+
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
140+
# runtime settings
141+
runner = dict(type='IterBasedRunner', max_iters=38400)
142+
checkpoint_config = dict(by_epoch=False, max_keep_ckpts=2, interval=800)
143+
evaluation = dict(by_epoch=False,
144+
start=0,
145+
interval=800,
146+
pre_eval=True,
147+
rule='less',
148+
save_best='abs_rel',
149+
greater_keys=("a1", "a2", "a3"),
150+
less_keys=("abs_rel", "rmse"))
151+
# iter runtime
152+
log_config = dict(
153+
_delete_=True,
154+
interval=50,
155+
hooks=[
156+
dict(type='TextLoggerHook', by_epoch=False),
157+
dict(type='TensorboardLoggerHook')
158+
])

configs/depthformer/depthformer_v.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
img_dir='input',
6161
ann_dir='gt_depth',
6262
depth_scale=256,
63-
split='kitti_eigen_train.txt',
63+
split='splits/kitti_eigen_train.txt',
6464
pipeline=train_pipeline,
6565
garg_crop=True,
6666
eigen_crop=False,
@@ -72,7 +72,7 @@
7272
img_dir='input',
7373
ann_dir='gt_depth',
7474
depth_scale=256,
75-
split='kitti_eigen_test.txt',
75+
split='splits/kitti_eigen_test.txt',
7676
pipeline=test_pipeline,
7777
garg_crop=True,
7878
eigen_crop=False,
@@ -84,7 +84,7 @@
8484
img_dir='input',
8585
ann_dir='gt_depth',
8686
depth_scale=256,
87-
split='kitti_eigen_test.txt',
87+
split='splits/kitti_eigen_test.txt',
8888
pipeline=test_pipeline,
8989
garg_crop=True,
9090
eigen_crop=False,
+155
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
_base_ = [
2+
'../_base_/models/depthformer_swin.py',
3+
'../_base_/default_runtime.py'
4+
]
5+
USEPE_FLAG = True
6+
depth_scale = 250
7+
# dataset settings
8+
dataset_type = 'DDADDataset'
9+
data_root = 'data/DDAD'
10+
img_norm_cfg = dict(
11+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
12+
train_pipeline = [
13+
dict(type='LoadDDADImageFromFile',USEPE=USEPE_FLAG,USE_DYNAMIC_PE=True),
14+
dict(type='DDADDepthLoadAnnotations',USE_DYNAMIC_PE=True),
15+
dict(type='DDADResize',shape=(384, 640),USE_DYNAMIC_PE=True),
16+
dict(type='Resize',ratio_range=(0.5,2.0)),
17+
dict(type='Padding',img_padding_value=(0,0,0),depth_padding_value=255,pe_k=True,ori_h=384,ori_w=640),
18+
dict(type='RandomRotate', prob=0.5, degree=2.5),
19+
dict(type='RandomFlip', prob=0.0),
20+
dict(type='RandomCrop', crop_size=(384, 640)),
21+
dict(type='ColorAug', prob=0.5, gamma_range=[0.9, 1.1], brightness_range=[0.9, 1.1], color_range=[0.9, 1.1]),
22+
dict(type='Normalize', depth_scale = depth_scale, **img_norm_cfg),
23+
dict(type='DefaultFormatBundle'),
24+
dict(type='Collect',
25+
keys=['img', 'depth_gt','pe_k_gt','height'],
26+
meta_keys=('filename', 'ori_filename', 'ori_shape',
27+
'img_shape', 'pad_shape', 'scale_factor',
28+
'flip', 'flip_direction', 'img_norm_cfg',)),
29+
]
30+
test_pipeline = [
31+
dict(type='LoadDDADImageFromFile',USEPE=USEPE_FLAG,USE_DYNAMIC_PE=True),
32+
dict(type='DDADResize',shape=(384, 640),depth=False),
33+
dict(
34+
type='MultiScaleFlipAug',
35+
img_scale=(384, 640),
36+
flip=False,
37+
flip_direction='horizontal',
38+
transforms=[
39+
dict(type='Normalize', depth_scale=depth_scale,**img_norm_cfg),
40+
dict(type='ImageToTensor', keys=['img']),
41+
dict(type='Collect',
42+
keys=['img','height','test'],
43+
meta_keys=('filename', 'ori_filename', 'ori_shape',
44+
'img_shape', 'pad_shape', 'scale_factor',
45+
'flip', 'flip_direction', 'img_norm_cfg',)),
46+
])
47+
]
48+
data = dict(
49+
samples_per_gpu=4,
50+
workers_per_gpu=4,
51+
train_dataloader=dict(
52+
shuffle=True,
53+
drop_last=True,
54+
persistent_workers=False),
55+
val_dataloader=dict(
56+
shuffle=False,
57+
persistent_workers=True),
58+
test_dataloader=dict(
59+
shuffle=False,
60+
persistent_workers=False),
61+
train=dict(
62+
type=dataset_type,
63+
split='splits/ddad_train_split.txt',
64+
pipeline=train_pipeline,
65+
min_depth=1e-3,
66+
max_depth=200,
67+
cameras = ['CAMERA_%02d' % idx for idx in [1,5,6,9]],
68+
),
69+
val=dict(
70+
type=dataset_type,
71+
split='splits/ddad_test_split.txt',
72+
pipeline=test_pipeline,
73+
min_depth=1e-3,
74+
max_depth=200,
75+
cameras = ['CAMERA_%02d' % idx for idx in [1,5,6,9]],
76+
),
77+
test=dict(
78+
type=dataset_type,
79+
split='splits/ddad_test_split.txt',
80+
pipeline=test_pipeline,
81+
min_depth=1e-3,
82+
max_depth=200,
83+
cameras = ['CAMERA_%02d' % idx for idx in [1,5,6,9]],
84+
))
85+
86+
87+
88+
model = dict(
89+
pretrained=None,
90+
depth_scale = depth_scale,
91+
backbone=dict(
92+
embed_dims=192,
93+
depths=[2, 2, 18, 2],
94+
num_heads=[6, 12, 24, 48],
95+
window_size=7,
96+
USEPE = USEPE_FLAG),
97+
neck=dict(
98+
type='HAHIHeteroNeck',
99+
positional_encoding=dict(
100+
type='SinePositionalEncoding', num_feats=256),
101+
in_channels=[64, 192, 384, 768, 1536],
102+
out_channels=[64, 192, 384, 768, 1536],
103+
embedding_dim=512,
104+
scales=[1, 1, 1, 1, 1]),
105+
pe_mask_neck=dict(
106+
type='LightPEMASKNeck'
107+
),
108+
decode_head=dict(
109+
type='DenseDepthHead',
110+
act_cfg=dict(type='LeakyReLU', inplace=True),
111+
in_channels=[64, 192, 384, 768, 1536],
112+
up_sample_channels=[64, 192, 384, 768, 1536],
113+
channels=64,
114+
min_depth=1e-3,
115+
max_depth=200,
116+
))
117+
# schedules
118+
# optimizer
119+
max_lr=1e-4
120+
optimizer = dict(
121+
type='AdamW',
122+
lr=max_lr,
123+
betas=(0.9, 0.999),
124+
weight_decay=0.01,
125+
paramwise_cfg=dict(
126+
custom_keys={
127+
'absolute_pos_embed': dict(decay_mult=0.),
128+
'relative_position_bias_table': dict(decay_mult=0.),
129+
'norm': dict(decay_mult=0.),
130+
}))
131+
# learning policy
132+
lr_config = dict(
133+
policy='CosineAnnealing',
134+
min_lr_ratio=1e-8,
135+
by_epoch=False) # test add by_epoch false
136+
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
137+
# runtime settings
138+
runner = dict(type='IterBasedRunner', max_iters=38400)
139+
checkpoint_config = dict(by_epoch=False, max_keep_ckpts=2, interval=800)
140+
evaluation = dict(by_epoch=False,
141+
start=0,
142+
interval=800,
143+
pre_eval=True,
144+
rule='less',
145+
save_best='abs_rel',
146+
greater_keys=("a1", "a2", "a3"),
147+
less_keys=("abs_rel", "rmse"))
148+
# iter runtime
149+
log_config = dict(
150+
_delete_=True,
151+
interval=50,
152+
hooks=[
153+
dict(type='TextLoggerHook', by_epoch=False),
154+
dict(type='TensorboardLoggerHook')
155+
])

0 commit comments

Comments
 (0)