diff --git a/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml b/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml index 380cb8d..a8db0fc 100644 --- a/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml +++ b/cfg/d3il/finetune/avoid_m1/ft_ppo_diffusion_mlp.yaml @@ -76,7 +76,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} / 2)'} + batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} * ${ft_denoising_steps} / 2)'} update_epochs: 10 vf_coef: 0.5 target_kl: 1 diff --git a/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml b/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml index e6e70f0..4396afb 100644 --- a/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml +++ b/cfg/d3il/finetune/avoid_m2/ft_ppo_diffusion_mlp.yaml @@ -76,7 +76,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} / 2)'} + batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} * ${ft_denoising_steps} / 2)'} update_epochs: 10 vf_coef: 0.5 target_kl: 1 diff --git a/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml b/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml index ccdfd2e..8f030b4 100644 --- a/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml +++ b/cfg/d3il/finetune/avoid_m3/ft_ppo_diffusion_mlp.yaml @@ -76,7 +76,7 @@ train: reward_scale_running: True reward_scale_const: 1.0 gae_lambda: 0.95 - batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} / 2)'} + batch_size: ${eval:'round(${train.n_steps} * ${env.n_envs} * ${ft_denoising_steps} / 2)'} update_epochs: 10 vf_coef: 0.5 target_kl: 1