-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexperiment_3_pool_run.py
98 lines (82 loc) · 3.37 KB
/
experiment_3_pool_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# next is to add accel and see the difference
# add stiffness too
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from all_functions import *
import pickle
from warnings import simplefilter
import multiprocessing as mp
def lern_to_walk_stiffness_pool(stiffness_version_A):
experiment_ID = "experiment_3_pool_G_"
reward_thresh = 3 #3:7 3B:5
mc_run_number = 100
babbling_time = 3
number_of_refinements = 0
rewards = np.zeros([mc_run_number])
energies = np.zeros([mc_run_number])
exploration_run_numbers = np.zeros([mc_run_number])
MuJoCo_model_name_A="nmi_leg_w_chassis_air_v{}.xml".format(stiffness_version_A)
MuJoCo_model_name_A_walk="nmi_leg_w_chassis_air_v{}_walk.xml".format(stiffness_version_A)
random_seed = -1
for mc_counter in range(mc_run_number):
random_seed+=1
# train model_A
np.random.seed(random_seed) # change the seed for different initial conditions
tf.random.set_random_seed(random_seed)
[babbling_kinematics, babbling_activations] =\
babbling_fcn(
MuJoCo_model_name=MuJoCo_model_name_A,
simulation_minutes=babbling_time,
kinematics_activations_show=False)
model_A_babble = inverse_mapping_fcn(
kinematics=babbling_kinematics,
activations=babbling_activations,
log_address="./logs/{}/scalars/stiffness_version{}/babble_A_mc_run{}/".format(experiment_ID, stiffness_version_A, mc_counter),
early_stopping=False)
cum_kinematics_A_babble = babbling_kinematics
cum_activations_A_babble = babbling_activations
#A_A test
np.random.seed(random_seed) # change the seed for different initial conditions
tf.random.set_random_seed(random_seed)
[ best_reward_so_far, all_rewards, best_features_so_far, real_attempt_activations, exploration_run_no ]=\
learn_to_move_2_fcn(
MuJoCo_model_name=MuJoCo_model_name_A_walk,
model=model_A_babble,
cum_kinematics=cum_kinematics_A_babble,
cum_activations=cum_activations_A_babble,
reward_thresh=reward_thresh,
energy_cost_weight=0,
refinement=False,
Mj_render=False)
[rewardA_A, _, _, _, real_attempt_activations]=\
feat_to_run_attempt_fcn(
MuJoCo_model_name=MuJoCo_model_name_A_walk,
features=best_features_so_far,
model=model_A_babble,
feat_show=False,
Mj_render=False)
total_energyA_A = np.square(real_attempt_activations).sum(0).sum(0)
print("traveled distance: ", rewardA_A)
print("consumed energy: ", total_energyA_A)
exploration_run_numbers[mc_counter] = exploration_run_no
rewards[mc_counter] = rewardA_A
energies[mc_counter] = total_energyA_A
os.makedirs("./results/{}".format(experiment_ID), exist_ok=True)
np.save("./results/{}/exploration_run_numbers_S{}".format(experiment_ID, stiffness_version_A),exploration_run_numbers)
np.save("./results/{}/rewards_S{}".format(experiment_ID, stiffness_version_A),rewards)
np.save("./results/{}/energies_S{}".format(experiment_ID, stiffness_version_A),energies)
return None
simplefilter(action='ignore', category=FutureWarning)
pool = mp.Pool(mp.cpu_count())
stiffness_versions = 9
pool.map_async(lern_to_walk_stiffness_pool, [row for row in range(stiffness_versions)])
pool.close()
pool.join()
#print("best_reward_so_far: ", best_reward_so_far)
## printing the results
# print("errors_mean: ",errors_all_A_A.mean(1))
# print("errors_std: ",errors_all_A_A.std(1))
# print("errors_mean: ",errors_all_A_B.mean(1))
# print("errors_std: ",errors_all_A_B.std(1))
# import pdb; pdb.set_trace()