-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun_human_labeller.sh
37 lines (27 loc) · 1.16 KB
/
run_human_labeller.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
run_command() {
algorithm='pebble' # [pebble, rime]
unsup_steps=19000
envname="Hopper_v3"
sac_lr=0.0005
num_interact=20000
feedback=100
reward_batch=10
seed=$1
device="cuda:${gpu_ids[$seed]}"
# PEBBLE
if [ "${algorithm,,}" == "pebble" ]; then
python train_PEBBLE_with_actual_human_labeller.py --device=$device --env="$envname" --seed="$seed" --actor_lr=$sac_lr --critic_lr=$sac_lr --unsup_steps=$unsup_steps --steps=10000000 --num_interact=$num_interact --max_feedback="$feedback" --reward_batch="$reward_batch" --reward_update=50
# RIME
else
python train_RIME_with_actual_human_labeller.py --env="$envname" --seed="$seed" --actor_lr=$sac_lr --critic_lr=$sac_lr --unsup_steps=$unsup_steps --steps=10000000 --num_interact=$num_interact --max_feedback="$feedback" --reward_batch=$reward_batch --reward_update=50 --device="$device" --least_reward_update=30 --threshold_variance='kl' --threshold_alpha=0.5 --threshold_beta_init=3.0 --threshold_beta_min=1.0
fi
}
declare -A gpu_ids=(
[12345]=0
)
seeds=(12345)
for seed in "${seeds[@]}"; do
run_command "$seed"
done
wait