-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhparams.py
108 lines (95 loc) · 4.44 KB
/
hparams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import nevergrad
import ray
import muzero
import os
import torch
class Hyperparams:
def __init__(self):
self.budget = 20
self.parallel_experiments = 1
self.lr = nevergrad.p.Log(lower=0.0001, upper=1)
self.discount = nevergrad.p.Log(lower=0.95, upper=1)
self.parametrization = nevergrad.p.Dict(lr=self.lr, discount=self.discount)
def hyperparameter_search(self, game_name, num_tests=20):
"""
Search for hyperparameters by launching parallel experiments.
Args:
game_name (str): Name of the game module, it should match the name of a .py file
in the "./games" directory.
parametrization : Nevergrad parametrization, please refer to nevergrad documentation.
budget (int): Number of experiments to launch in total.
parallel_experiments (int): Number of experiments to launch in parallel.
num_tests (int): Number of games to average for evaluating an experiment.
"""
print("Budget", self.budget)
optimizer = nevergrad.optimizers.OnePlusOne(
parametrization=self.parametrization, budget=self.budget
)
running_experiments = []
best_training = None
try:
# Launch initial experiments
for _ in range(self.parallel_experiments):
if self.budget > 0:
param = optimizer.ask()
print(f"Launching new experiment: {param.value}")
muz = muzero.MuZero(game_name, param.value)
muz.param = param
# muz.gpu_config()
# muz.cpu_actoring()
muz.train(False)
running_experiments.append(muz)
self.budget -= 1
while self.budget > 0 or any(running_experiments):
for i, experiment in enumerate(running_experiments):
if experiment and experiment.config.training_steps <= ray.get(
experiment.shared_storage_worker.get_info.remote(
"training_step"
)
):
experiment.terminate_workers()
result = experiment.test(False, num_tests=num_tests)
if not best_training or best_training["result"] < result:
best_training = {
"result": result,
"config": experiment.config,
"checkpoint": experiment.checkpoint,
}
print(f"Parameters: {experiment.param.value}")
print(f"Result: {result}")
optimizer.tell(experiment.param, -result)
if self.budget > 0:
param = optimizer.ask()
print(f"Launching new experiment: {param.value}")
muz = muzero.MuZero(game_name, param.value)
muz.param = param
# muz.gpu_config()
# muz.cpu_actoring()
muz.train(False)
running_experiments[i] = muz
self.budget -= 1
else:
running_experiments[i] = None
except KeyboardInterrupt:
for experiment in running_experiments:
if isinstance(experiment, muz):
experiment.terminate_workers()
recommendation = optimizer.provide_recommendation()
print("Best hyperparameters:")
print(recommendation.value)
if best_training:
# Save best training weights (but it's not the recommended weights)
# This might need to be updated because results_path is now on the muzero object
os.makedirs(best_training["config"].results_path, exist_ok=True)
torch.save(
best_training["checkpoint"],
os.path.join(best_training["config"].results_path, "model.checkpoint"),
)
with open(
os.path.join(
best_training["config"].results_path, "best_parameters.txt"
),
"w",
) as text_file:
text_file.write(str(recommendation.value))
return recommendation.value