-
Notifications
You must be signed in to change notification settings - Fork 1
/
gplearn_optuna.py
68 lines (54 loc) · 2.08 KB
/
gplearn_optuna.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import pickle
import joblib
import numpy as np
import optuna
from gplearn.genetic import SymbolicRegressor
class Objective:
def __init__(self, inputs, outputs):
# Dataset
self.inputs = inputs
self.outputs = outputs
def __call__(self, trial: optuna.Trial):
# If required, please uncomment hyperparameters and include the kwargs appropriately in est_gp
# population_size = trial.suggest_int("population_size", 1000, 2000, step=50)
# tournament_size = trial.suggest_int("tournament_size", 20, 40, step=5)
# init_depth_min = trial.suggest_int("init_depth_min", 2, 8)
# init_depth_max = init_depth_min + trial.suggest_int("init_depth_diff", 0, 6)
# population_size = 1000
# tournament_size = 20
# init_depth_min = 2
# init_depth_max = 6
# parsimony_coefficient = trial.suggest_float("parsimony_coefficient", 0.0001, 0.1)
directory_name = f"outputs/{trial.study.study_name}"
model_name = f"{trial.number}.pkl"
os.makedirs(directory_name, exist_ok=True)
est_gp = SymbolicRegressor(
verbose=0,
n_jobs=4,
function_set=("add", "sub", "mul"),
)
est_gp.fit(self.inputs, self.outputs)
with open(os.path.join(directory_name, model_name), "wb") as f:
pickle.dump(est_gp, f)
return est_gp.run_details_["best_fitness"][-1]
def main():
x0 = np.random.rand(10024, 1)
x1 = np.random.rand(10024, 1)
x2 = np.random.rand(10024, 1)
y0 = x0 + x1 - x2
x_train = np.concatenate([x0, x1, x2], 1)
y_train = y0.squeeze()
study_name = "distill-offline-dataset-v1"
directory_name = f"outputs/{study_name}"
os.makedirs(directory_name, exist_ok=True)
study = optuna.create_study(
storage=f"sqlite:///{directory_name}/{study_name}.db",
study_name=study_name,
direction="minimize",
load_if_exists=True,
)
study.optimize(Objective(x_train, y_train))
joblib.dump(study, f"{directory_name}/study.pkl")
if __name__ == "__main__":
main()