forked from pratikaher88/HyperSpectralRL_V2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_sac.py
71 lines (58 loc) · 2.01 KB
/
run_sac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pickle
from rl_trainer_sac import RL_Trainer
params = {'agent':{
'agent_class': 'SAC',
'n_iter': 2000,
'trajectory_sample_size': 10,
'batch_size': 20,
'num_critic_updates':10,
'num_bands':200,
'reward_type':'correlation',
'num_critic_updates_per_agent_update': 1,
'num_actor_updates_per_agent_update' : 1,
'critic_target_update_frequency': 1,
'actor_update_frequency' : 1,
'exp_reward': True
},
'actor':{
'num_bands':200,
'band_selection_num': 30,
'log_std_bounds': [-20,2],
'action_range': [-1,1],
'init_temperature': 1.0,
'learning_rate': 0.001,
'epsilon': 0, #SAC
'epsilon_decay':0.99999
},
'critic':{
'num_grad_steps_per_target_update' : 1,
'num_target_updates' : 1,
'num_bands':200,
'gamma':0.99,
'learning_rate': 0.001,
'double_q':False
},
'policy':{
'epsilon':0.99,
'epsilon_decay':0.9999
},
'data':{
'band_selection_num':30,
'dataset_type':'IndianPines',
'data_file_path':r'/Users/pratikaher/FALL22/HyperSpectralRL/ForPratik/data_indian_pines_drl.mat',
'sample_ratio':0.1
},
}
if __name__ == "__main__":
# with open('data/data_cache.pickle', 'rb') as handle:
# data_cache_loaded = pickle.load(handle)
rl_trainer = RL_Trainer(params)
rl_trainer.run_training_loop()
print(rl_trainer.LogManager.logging_df.head())
rl_trainer.LogManager.log_final_data()
# import pickle
# data_cache = agent.cache
# with open('data_cache.pickle', 'wb') as handle:
# pickle.dump(data_cache, handle, protocol=pickle.HIGHEST_PROTOCOL)
# with open('data_cache.pickle', 'rb') as handle:
# data_cache_loaded = pickle.load(handle)