ftr_envs/tasks/crossing/agents/ftr_algo_sac_cfg.yaml

params:
  seed: ${.config.seed}

  algo:
    name: sac

  load_checkpoint: ${if:${.config.checkpoint},True,False} # flag which sets whether to load the checkpoint
  load_path: ${.config.checkpoint} # path to the checkpoint to load

  config:
    seed: 40
    name: ftr_algo_sac
    experiment: ""
    device: cuda:0
    max_iterations: ""
    checkpoint: ""
    test: False

  learn:
    agent_name: ${..config.name}
    full_experiment_name: logs/${resolve_default:${..config.name},${..config.experiment}}
    device: ${..config.device}
    device_name: ${..config.device}

    test: ${..config.test}

    resume: 0
    save_interval: 500 # check for potential saves every this many iterations
    print_log: True

    # rollout params
    max_iterations: 3000

    # training params
    hidden_nodes: 512
    hidden_layer: 3

    cliprange: 0.2
    nsteps: 8
    noptepochs: 1
    nminibatches: 4 # this is per agent
    replay_size: 10000
    polyak: 0.99
    learning_rate: 0.0003
    max_grad_norm: 1
    ent_coef: 0.2
    reward_scale: 1
    batch_size: 32
#    optim_stepsize: 1.e-3 # 3e-4 is default for single agent training with constant schedule
  #  schedule: adaptive # could be adaptive or linear or fixed
  #  desired_kl: 0.016
    gamma: 0.99
  #  lam: 0.95
  #  init_noise_std: 0.8

    log_interval: 1
    asymmetric: False