-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_4cube_env.py
159 lines (123 loc) · 5.68 KB
/
main_4cube_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import square_env
from curious_agent import CuriousAgent
import curious_agent as cru
import gym
import numpy as np
import time
import square_env.envs as sqv
import matplotlib.pyplot as plt
from matplotlib import style
import threading
import random
from draw_plots import draw_plots, plot_together, plot_field, draw_color_maps
from activate_agent import activate_agent
from copy import deepcopy
from random_agent import RandomAgent
from neural_network import NeuralNetwork
import datetime
import stats
from moving_cube import MovingCube
import pickle as pkl
NUM_OF_EPOCHES = 100
PRINT_STATE_PRED = 50
PRINT_TIME_STEP = 500
MAX_STEPS = 5000
NUMBER_OF_AGENTS = 5
def add_avg_dict(src, d, i):
if i == 0:
for j in d:
src[j] = np.array(d[j]) if isinstance(d[j], list) else d[j]
else:
for j in src:
if isinstance(src[j], np.ndarray) and src[j].dtype == 'float':
src[j] = (float(i) * src[j] + np.array(d[j])) / float(i + 1)
def get_agent_dict(all_agents_dict, index=0):
d = {}
for i in all_agents_dict:
d[i] = all_agents_dict[i][index]
return d
def join_dict_list(lst):
d = {}
for i in lst[0]:
if isinstance(lst[0][i], list):
d[i] = np.array(lst[0][i])
if d[i].dtype == 'float':
d[i] /= len(lst)
else:
d[i] = lst[0][i]
for i in range(1, len(lst)):
for j in lst[i]:
if isinstance(d[j], np.ndarray) and d[j].dtype == 'float':
d[j] += np.array(lst[i][j]) / len(lst)
return d
def main():
agent_dict = []
random_dict = []
random_agent = RandomAgent(0)
wall1, wall2 = MovingCube(1), MovingCube(2)
print('began running at %s' % datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S"))
color_map_agent = []
sqv.set_global("AGENTS_COUNT", NUMBER_OF_AGENTS)
for i in range(10):
learner = NeuralNetwork(cru.AGENT_LEARNER_NETWORK_SHAPE, cru.linear_relu, min=-0.01, max=0.01)
curious_agent = CuriousAgent(0)
curious_agent.learner = deepcopy(learner)
d = activate_agent(1000, number_of_epoches=NUM_OF_EPOCHES, render=False, print_info=False, reset_env=False,
agents=[curious_agent], get_avg_errors=False,
get_values_field=True, number_of_error_agents=1)
curious_agent.learner = deepcopy(learner)
d1 = activate_agent(MAX_STEPS, number_of_epoches=1, render=False, print_info=False, reset_env=False,
agents=[curious_agent], get_avg_errors=True,
get_values_field=True, number_of_error_agents=1)
d['total_errors'] = d1['total_errors']
agent_dict.append(get_agent_dict(d))
color_map_agent.append(stats.get_color_map(curious_agent))
random_agent.learner = learner
d = activate_agent(MAX_STEPS, render=False, print_info=False, reset_env=False, agents=[random_agent], get_avg_errors=True,
number_of_error_agents=1)
random_dict.append(get_agent_dict(d))
print('finished running #%i at %s' % (i + 1, datetime.datetime.now().strftime("%a, %d %B %Y %H:%M:%S")))
means_curious = []
for i in agent_dict:
means_curious.append(i['total_errors'])
std_agent = np.array(means_curious).std(axis=0)
color_map_agent = np.array(color_map_agent).mean(axis=0)
figs, axes = draw_color_maps(color_map_agent)
means_random = []
for i in random_dict:
means_random.append(i['total_errors'])
std_random = np.array(means_random).std(axis=0)
agent_dict = join_dict_list(agent_dict)
#draw_plots(agent_dict)
random_dict = join_dict_list(random_dict)
#draw_plots(random_dict)
#fig, ax ,q = plot_field(*agent_dict['fields'], title='Agent Value Field', color=agent_dict['fields_colors'])
errors_rate_curious = agent_dict['total_errors']
errors_rate_random = random_dict['total_errors']
last_td_agent = np.zeros((len(agent_dict['epoches_tds']),))
for i, v in enumerate(agent_dict['epoches_tds']):
last_td_agent[i] = v[-1]
fig0, ax0 = plot_together(np.arange(len(last_td_agent)),[last_td_agent, {'label':'curious', 'color':'blue'}], title='Epochs Last TD',
axis_labels=['epoch', 'last TD'])
fig1, ax1 = plot_together(random_dict['timesteps'], [errors_rate_curious, {'label':'curious', 'color':'blue'}],
[errors_rate_random, {'label':'random', 'color':'red'}], title='Total Errors STD',
std=[std_agent, std_random], axis_labels=['steps', 'total error'])
fig2, ax2 = plot_together(random_dict['timesteps'], [errors_rate_curious, {'label': 'curious', 'color': 'blue'}],
[errors_rate_random, {'label': 'random', 'color': 'red'}], title='Total Errors Means',
means=[means_curious, means_random], axis_labels=['steps', 'total error'])
fig3, ax3 = plot_together(random_dict['timesteps'][:-1], [stats.derivative(errors_rate_curious), {'label': 'curious', 'color': 'blue'}],
[stats.derivative(errors_rate_random), {'label': 'random', 'color': 'red'}], title='Total Errors Derivative',
axis_labels=['steps', 'total error'])
figs.savefig('./plots/colormap.png')
fig0.savefig('./plots/tds.png')
fig1.savefig('./plots/std.png', dpi=300)
fig2.savefig('./plots/means.png', dpi=300)
fig3.savefig('./plots/deriv.png')
with open('4cubes_data.pkl', 'wb') as f:
pkl.dump([last_td_agent, errors_rate_curious, errors_rate_random, color_map_agent, curious_agent], f)
plt.show()
if __name__ == "__main__":
# main()
f = open('4cubes_data.pkl', 'rb')
ca = pkl.load(f)
print()