Skip to content

Commit

Permalink
Add ReBRAC algorithm (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
DT6A authored Jul 20, 2023
1 parent cc7f36f commit 42c9793
Show file tree
Hide file tree
Showing 48 changed files with 2,103 additions and 101 deletions.
193 changes: 97 additions & 96 deletions README.md

Large diffs are not rendered by default.

760 changes: 760 additions & 0 deletions algorithms/offline/rebrac.py

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/large_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.002
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.002
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-large-diverse-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-large-diverse-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/large_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.002
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.001
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-large-play-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-large-play-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/medium_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.001
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.0
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-medium-diverse-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-medium-diverse-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/medium_play_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.001
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.0005
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-medium-play-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-medium-play-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/umaze_diverse_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.003
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.001
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-umaze-diverse-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-umaze-diverse-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/antmaze/umaze_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.003
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.002
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-umaze-v2
eval_episodes: 100
eval_every: 50
eval_seed: 42
gamma: 0.999
group: rebrac-antmaze-umaze-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/door/cloned_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.1
critic_learning_rate: 0.0003
critic_ln: true
critic_n_hiddens: 3
dataset_name: door-cloned-v1
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-door-cloned-v1
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/door/expert_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.05
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.01
critic_learning_rate: 0.0003
critic_ln: true
critic_n_hiddens: 3
dataset_name: door-expert-v1
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-door-expert-v1
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/door/human_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.1
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.1
critic_learning_rate: 0.0003
critic_ln: true
critic_n_hiddens: 3
dataset_name: door-human-v1
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-door-human-v1
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/expert_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.01
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-expert-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-expert-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/full_replay_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.001
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.1
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-full-replay-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-full-replay-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/medium_expert_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.1
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-medium-expert-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-medium-expert-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
28 changes: 28 additions & 0 deletions configs/offline/rebrac/halfcheetah/medium_replay_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
actor_bc_coef: 0.01
actor_learning_rate: 0.001
actor_ln: false
actor_n_hiddens: 3
batch_size: 1024
critic_bc_coef: 0.001
critic_learning_rate: 0.001
critic_ln: true
critic_n_hiddens: 3
dataset_name: halfcheetah-medium-replay-v2
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: rebrac-halfcheetah-medium-replay-v2
hidden_dim: 256
name: rebrac
noise_clip: 0.5
normalize_q: true
normalize_reward: false
normalize_states: false
num_epochs: 1000
num_updates_on_epoch: 1000
policy_freq: 2
policy_noise: 0.2
project: ReBRAC
tau: 0.005
train_seed: 0
Loading

0 comments on commit 42c9793

Please sign in to comment.