-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPropagation.py
170 lines (129 loc) · 5.61 KB
/
Propagation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import torch
import torch.nn as nn
import torch.nn.functional as F
class LinNet(nn.Module):
def __init__(self, action_dim=4, obs_dim=39, reward_dim=1):
super(LinNet, self).__init__()
self.in_features = action_dim + obs_dim
self.out_features = obs_dim + reward_dim
self.input_layer = nn.Linear(in_features=self.in_features, out_features=128)
self.hidden1_layer = nn.Linear(in_features=128, out_features=256)
self.hidden2_layer = nn.Linear(in_features=128, out_features=128)
self.output_layer = nn.Linear(in_features=128, out_features=self.out_features)
for module in self.parameters():
module.requires_grad = False
def forward(self, x):
x = F.relu(self.input_layer(x))
x = F.relu(self.hidden1_layer(x))
x = F.relu(self.hidden2_layer(x))
return self.output_layer(x)
class Propagation_net:
def __init__(self, num_particles=50, action_dim=4, obs_dim=39):
self.num_particles = num_particles
self.action_dim = action_dim
self.obs_dim = obs_dim
self.deterministic_nets = []
for i in range(self.num_particles):
self.deterministic_nets.append(LinNet())
def move_to_gpu(self):
for net_idx in range(self.num_particles):
self.deterministic_nets[net_idx] = self.deterministic_nets[net_idx].to('cuda:0')
def sample_from(self, bnn):
for net in self.deterministic_nets:
iterator_layer_bnn = bnn.named_modules()
_ = next(iterator_layer_bnn) # first reference : entire class, discard
for name, layer in iterator_layer_bnn:
weight, bias = layer.sample_weight(requires_grad=False)
net.get_submodule(name).weight = nn.Parameter(weight)
net.get_submodule(name).bias = nn.Parameter(bias)
def propagate(self, initial_state, actions, dev='cuda:0'):
"""
:param initial_state: the original state, is common to all future net
:param actions: this comes from the cem: [ 1 x (act*horizons)] <--- this will be call for pop size
:return: Q_val
"""
X = torch.zeros((self.num_particles, self.obs_dim + self.action_dim), device=dev)
Y = torch.zeros((self.num_particles, self.obs_dim + 1), device=dev) # 1 AKA reward
X[:, :self.obs_dim] = initial_state
rewards = torch.zeros(self.num_particles, device=dev)
with torch.no_grad():
for h in range(actions.shape[0] // 4): # AKA horizon
# add action to propagate
X[:, self.obs_dim:] = actions[h * self.action_dim: (h + 1) * self.action_dim]
for row_idx, x in enumerate(X):
Y[row_idx] = self.deterministic_nets[row_idx](x)
X[:, :self.obs_dim] = Y[:, :self.obs_dim] # update state
rewards += Y[:, -1] # collect rewards
return (rewards / h).mean()
if __name__ == "__main__":
import time
from bnn import BNN
from cem_optimizer_v2 import CEM_opt
import numpy as np
torch.set_default_dtype(torch.float64)
dev = 'cuda:0'
num_particles = 50
cem = CEM_opt(num_particles)
t = time.time()
act_sequences = torch.from_numpy(cem.population)
r = np.zeros(act_sequences.shape[0])
print(time.time()-t)
original_model = BNN(action_dim=4, obs_dim=39, reward_dim=1)
#bnn_path = '/home/dema/PycharmProjects/lifelong_rl/VBLRL_rl_exam/model_stock/world/model_envWorld.pth'
#original_model.load_state_dict(torch.load(bnn_path, map_location=torch.device('cpu')))
prop_net = Propagation_net(num_particles)
init_s = torch.randn(39)
'''
# incredibilmente lento, l'ho fatto andare per un 4 minuti, non so a che punto era,
# non va ne con le deepcopy ne senza
from multiprocessing import Process
from copy import deepcopy
def funct_to_parallelize(prop_net, bnn, init_s, act_seq, rew, idx):
prop_net.sample_from(bnn)
r = prop_net.propagate(init_s, act_seq, dev='cpu')
rew[idx] = r.detach().numpy()
process = []
t = time.time()
for i, act_seq in enumerate(act_sequences):
p = Process(target=funct_to_parallelize, args=(deepcopy(prop_net), deepcopy(original_model), init_s, act_seq, r, i))
p.start()
process.append(p)
for p in process:
p.join()
print("multithread cpu: ", time.time() - t)
print(r != 0)
'''
t = time.time()
for idx, act_seq in enumerate(act_sequences):
prop_net.sample_from(original_model)
r[idx] = prop_net.propagate(init_s, act_seq, dev='cpu')
# here update the cem
print('cpu: ', time.time() - t)
print(r)
'''
init_s = init_s.to(dev)
original_model = original_model.to(dev)
prop_net.model_to_gpu()
t = time.time()
for act_seq in act_sequences:
act_seq = act_seq.to(dev)
#prop_net.sample_from(original_model)
prop_net.propagate(init_s, act_seq)
print('gpu: ', time.time() - t)
'''
#
# 50 particles by colab for 1 action_sequence PropNet:
# cpu: 0.18
# gpu: 0.22
# 50 particles by my pc for 1 action_sequence PropNet:
# cpu: 0.039
# gpu: 0.38
# 50 particles for 1 action_sequence old method (threads):
# cpu: 0.1349
# gpu: idk
# FULL POWER (all correct parameters : 500 act_seq, 50 particles)
# total rollout in cpu PropNet (my pc) : 34 sec
# total rollout in gpu PropNet (my pc) : 37 sec
# total rollout in cpu fake threads (my pc): 72 sec
# total rollout in cpu sequential no re-plan (my pc): 85 sec
# total rollout in cpu sequential re-plan (my pc): ~ 5 min