examples/rl_gym/config_dqn.yml

args:
  logdir: ./logs/rl-gym-dqn  #  change me
  expdir: null

  vis: 0
  infer: 0  #  change me
  valid: 1  #  change me
  train: 1  #  change me

db:
  db: RedisDB
  port: 12000
  prefix: gym-dqn  # TODO: remove

environment:
  environment: GymEnvWrapper
  env_name: LunarLander-v2

  history_len: &history_len 3

  frame_skip: 2
  reward_scale: 1.0

agents:
  critic:
    agent: ActionCritic

    state_net_params:  # state -> hidden representation
      observation_net_params:
        _network_type: "linear"
        history_len: *history_len
        features: [128]  # first hidden would be taken from state_shape
        use_bias: false
        normalization: LayerNorm
        activation: ReLU
      main_net_params:
        features: [128, 128]
        use_bias: false
        normalization: LayerNorm
        activation: ReLU
    value_head_params:  # hidden representation -> ~policy
      in_features: 128  # out features would be taken from action_shape

#      distribution: categorical
#      num_atoms: 51
#      values_range: [-300.0, 300.0]

#      distribution: quantile
#      num_atoms: 51

algorithm:
  algorithm: DQN

  n_step: 1
  gamma: 0.99
  critic_tau: 1.0

  critic_loss_params:
    criterion: HuberLoss
    clip_delta: 10.0

  critic_optimizer_params:
    optimizer: Adam
    lr: 0.0003

trainer:
  batch_size: 256              # transitions
  num_workers: 1
  epoch_len: 500               # batches, 128k samples

  replay_buffer_size: 1000000  # transitions
  min_num_transitions: 5000    # transitions

  save_period: 100             # epochs
  weights_sync_period: 1       # epochs
  target_update_period: 250    # batches, 64k updates

  max_updates_per_sample: 32

sampler:
  weights_sync_period: 1

  exploration_params:
    - exploration: Boltzmann
      probability: 0.5
      temp_init: 1.0
      temp_final: 0.5
      annealing_steps: 10000

    - exploration: EpsilonGreedy
      probability: 0.45
      eps_init: 1.0
      eps_final: 0.5
      annealing_steps: 10000

    - exploration: Greedy
      probability: 0.05

  valid_seeds: [
    1608637542,
    1273642419,
    1935803228,
    787846414,
    996406378,
    1201263687,
    423734972,
    415968276,
    670094950,
    1914837113,
    669991378,
    429389014,
    249467210,
    1972458954,
    1572714583,
    1433267572,
    434285667,
    613608295,
    893664919,
    648061058,
    88409749,
    242285876,
    2018247425,
    953477463,
    1427830251,
    1883569565,
    911989541,
    3344769,
    780932287,
    2114032571,
    787716372,
    504579232,
    1306710475,
    479546681,
    106328085,
    30349564,
    1855189739,
    99052376,
    1250819632,
    106406362,
    480404538,
    1717389822,
    599121577,
    200427519,
    1254751707,
    2034764475,
    1573512143,
    999745294,
    1958805693,
    389151677,
    1224821422,
    508464061,
    857592370,
    1642661739,
    61136438,
    2075460851,
    396917567,
    2004731384,
    199502978,
    1545932260,
    461901618,
    774414982,
    732395540,
    1934879560,
    279394470,
    56972561,
    1927948675,
    1899242072,
    1999874363,
    271820813,
    1324556529,
    1655351289,
    1308306184,
    68574553,
    419498548,
    991681409,
    791274835,
    1035196507,
    1890440558,
    787110843,
    524150214,
    472432043,
    2126768636,
    1431061255,
    147697582,
    744595490,
    1758017741,
    1679592528,
    1111451555,
    782698033,
    698027879,
    1096768899,
    1338788865,
    1826030589,
    86191493,
    893102645,
    200619113,
    290770691,
    793943861,
    134489564
  ]