-
Notifications
You must be signed in to change notification settings - Fork 108
/
man_wonder_bat_spider.yaml
118 lines (92 loc) · 4.08 KB
/
man_wonder_bat_spider.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# CUDA_VISIBLE_DEVICES=1 python test_fatezero.py --config config/shape/man_wonder_bat_spider.yaml
pretrained_model_path: "ckpt/man_skate_250"
dataset_config:
path: "data/shape/man_skate"
prompt: "A man rides a wooden skateboard on the rail with a helmet and arms outstretched"
n_sample_frame: 8
sampling_rate: 1
stride: 80
offset:
left: 0
right: 0
top: 0
bottom: 0
editing_config:
use_invertion_latents: true
use_inversion_attention: true
guidance_scale: 7.5
editing_prompts: [
# source prompt
A man rides a wooden skateboard on the rail with a helmet and arms outstretched,
# foreground color and species
A Wonder Woman rides a wooden skateboard on the rail with cowboy hat and arms outstretched,
A Spider-Man rides a wooden skateboard on the rail and arms outstretched,
A Batman rides a wooden skateboard on the rail and arms outstretched
]
p2p_config:
0:
# Whether to directly copy the cross attention from source
# True: directly copy, better for object replacement
# False: keep source attention, better for style
is_replace_controller: False
# Semantic preserving and replacement Debug me
cross_replace_steps:
default_: 0.8
# Source background structure preserving, in [0, 1].
# e.g., =0.6 Replace the first 60% steps self-attention
self_replace_steps: 0.6
# Amplify the target-words cross attention, larger value, more close to target
# eq_params:
# words: ["silver", "sculpture"]
# values: [2,2]
# Target structure-divergence hyperparames
# If you change the shape of object better to use all three line, otherwise, no need.
# Without following three lines, all self-attention will be replaced
# blend_words: [['cat',], ["cat",]]
# blend_self_attention: True
# # blend_latents: False # performance not so good in our case, need debug
# blend_th: [2, 2]
# preserve source structure of blend_words , [0, 1]
# default is blend_th: [2, 2] # preserve all source self-attention
# blend_th : [0.0, 0.0], mask -> 1, use more target self-attention att_replace, more generated attention, less source acttention
1:
is_replace_controller: False
cross_replace_steps:
default_: 0.3
self_replace_steps: 0.3
blend_words: [['man',]]
blend_self_attention: True
# blend_latents: False # performance not so good in our case, need debug
blend_th: [0.3, 0.3]
2:
is_replace_controller: False
cross_replace_steps:
default_: 0.3
self_replace_steps: 0.3
blend_words: [['man',]]
blend_self_attention: True
# blend_latents: False # performance not so good in our case, need debug
blend_th: [0.3, 0.3]
3:
is_replace_controller: False
cross_replace_steps:
default_: 0.9
self_replace_steps: 0.9
blend_words: [['man',]]
blend_self_attention: True
# blend_latents: False # performance not so good in our case, need debug
blend_th: [0.3, 0.3]
clip_length: "${..dataset_config.n_sample_frame}"
sample_seeds: [0]
num_inference_steps: 50
prompt2prompt_edit: True
model_config:
lora: 16
# temporal_downsample_time: 4
# SparseCausalAttention_index: ['mid']
# least_sc_channel: 1280
# least_sc_channel: 100000
test_pipeline_config:
target: video_diffusion.pipelines.p2p_ddim_spatial_temporal.P2pDDIMSpatioTemporalPipeline
num_inference_steps: "${..validation_sample_logger.num_inference_steps}"
seed: 0