-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path6_run_AIRL_data.py
115 lines (82 loc) · 3.92 KB
/
6_run_AIRL_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import argparse
import os
from AIRL.AIRL_base_data import AIRL_base
from shared.argparser import argparser
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import warnings
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = '3' #use the second GPU
def run_AIRL(args,sub_id,base_name):
args.gpu_fraction = 1 #0.19
args.n_feature = 11
##hyperparameters for loading trajectory data
args.envs_base = 'Highway' #load data from this directory
args.expert_traj_dir = 'traj_from_data_v2/' + sub_id
print("load trajectory from",args.expert_traj_dir)
expert_actions = np.load(args.expert_traj_dir+'/actions.npy')
args.full_traj_length = len(expert_actions) #length of trajectory data
print("loaded trajectory size",len(expert_actions))
reward_data = np.load(args.expert_traj_dir+'/rewards.npy')
expert_reward = np.mean(reward_data)
data_length = len(expert_actions) - (len(expert_actions)%100) #drop last two digits for parallel sampling
if args.restrict_sample:
data_length = args.sample_size
print(data_length,"original samples used")
##hyperparameters that determine the size of the trajectory actually used in the AIRL
args.traj_length = data_length #number of steps in the expert trajectory
args.split = 5
if args.cv:
args.cv_length = int(args.traj_length/args.fold)
print("cv length", args.cv_length, "cv number", args.sim_number)
base_length = int(args.traj_length) - args.cv_length
else:
base_length = int(args.traj_length)
print(base_length," samples used")
args.min_length = base_length #agent sample size
args.num_expert_dimension = base_length # expert sample size
args.batch_size_discrim = int(base_length/(args.split)) #500, size of the training batch for the discriminator model
args.random_starting = False
args.num_parallel_sampler = 2 #number of parallel workers
##learning rates
args.lr_policy = 5e-4 #default = 1e-4
args.lr_value = 5e-4 #default = 1e-4
args.lr_discrim = 5e-4/args.split #default = 1e-4
##num epochs
args.num_epoch_policy = 4 #4 #default = 6
args.num_epoch_value = 4 #4 #default = 10
##num_nodes
args.units_p = [512]*3
args.units_v = [512]*3
args.units_d = 512
args.iteration = 1001 #number of IRL iteration
num_sim = 1 #number of simulations
args.discretize = False
args.boost_action = True #this was true in state-action pair
args.polish_action = False
for iSim in range(num_sim):
# args.envs_1 ="Highway_"+sub_id+"_take" + str(starting_number+iSim)
args.envs_1 ="Highway_" + base_name + "_take" + str(args.sim_number+iSim)
AIRL_base(args,expert_reward)
if __name__ == '__main__':
sim_list = [0] ### [1,2,3, ..., nfold] for cross validation, any numbering for non-cv
for iSim in range(len(sim_list)):
sub_list = np.arange(1) + 303 #subject list
print(sub_list)
for iSub in range(len(sub_list)):
args = argparser()
warnings.filterwarnings("ignore")
args.state_only = False
args.prior_model = False
args.model_restore = 'trained_models_AIRL/Highway_sub999_35qb120_boost_take2/1500model.ckpt'
sub_id = 'sub'+str(sub_list[iSub])#'sub315'
base_name = sub_id + "_dqn" #"_35qb120_boost"
args.config = 'config_35_quick120.npy'
# args.optim = 'Adam' #Adam or SGD (Default: Adam)
args.cv = False #cross validation
args.fold = 4
args.restrict_sample = False #use a subset of samples
args.sample_size = 10000 #number of samples to use (if restrict_sample)
args.sim_number = sim_list[iSim]
print("cv_number",args.sim_number)
run_AIRL(args,sub_id,base_name)