-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
190 lines (158 loc) · 7.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import argparse
from datetime import datetime
parser = argparse.ArgumentParser(description='AutoGRL')
parser.add_argument('--dataset', type=str, default='cora')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--iterations', type=int, default=3, help='GBDT iteration rounds')
parser.add_argument('--n', type=int, default=500, help='number of initial archs')
parser.add_argument('--m', type=int, default=10000, help='number of archs to predict in each round')
parser.add_argument('--k', type=int, default=500, help='number of top archs to evaluate in each round')
parser.add_argument('--p', type=int, default=5, help='pruning features with lowest p shap values')
parser.add_argument('--k_test', type=int, default=10, help='number of archs that will be evaluated on test set')
parser.add_argument('--gbdt_lr', type=float, default=0.05, help='GBDT argument')
args = parser.parse_args()
import random
random.seed(args.seed)
import numpy as np
np.random.seed(args.seed)
import torch
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
torch.backends.cudnn.deterministic = True
from search_space import pruning_search_space_by_eda, pruning_search_space_by_shap
from data_prepare import load_data
from utils import Sampler
from utils import TransductiveTrainer, InductiveTrainer
import pandas as pd
import lightgbm as lgb
import pickle
from catboost import CatBoostRegressor, Pool
def main(args):
# build search space
data = load_data(args.dataset, args.seed)
ss, _ = pruning_search_space_by_eda(data)
if data.setting == 'inductive':
trainer = InductiveTrainer()
else:
trainer = TransductiveTrainer()
sampler = Sampler(args.dataset, ss)
archs = []
val_scores = []
top_archs = []
top_val_scores = []
top_test_scores = []
# init training data for GBDT
sampled_archs = sampler.sample(args.n)
i = 0
while i < len(sampled_archs):
arch = sampled_archs[i]
data = sampler.load_data(arch)
try:
model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1)
trainer.init_trainer(model, arch[7], arch[6])
val_score = trainer.train(data)
except RuntimeError as e:
if "cuda" in str(e) or "CUDA" in str(e): # CUDA OOM, sample another arch
print(e)
sampled_archs += sampler.sample(1)
i += 1
continue
else:
raise e
archs.append(arch)
val_scores.append(val_score)
print(arch, f'real val score: {val_score}')
print(f'Number of evaluated archs: {len(archs)}')
i += 1
# train GBDT predictor
for iter_round in range(1, args.iterations + 1):
print(f'Iteration round {iter_round}, ReTraining model and sampling archs...', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# train GBDT
X = [[str(e) for e in row] for row in archs]
y = np.array(val_scores)
train_pool = Pool(X, y, cat_features=[i for i in range(len(X[0]))])
# X = lgb.Dataset(pd.DataFrame(X, columns=ss.keys()), label=np.array(val_scores))
# gbdt_model = lgb.train(gbdt_params, X, args.gbdt_num_boost_round, categorical_feature=ss.keys())
gbdt_model = CatBoostRegressor(
learning_rate=args.gbdt_lr,
verbose=False
)
gbdt_model.fit(train_pool)
# pruning search space
ss = pruning_search_space_by_shap(archs, gbdt_model, ss, args.p)
sampler.update_search_space(ss)
# predict some archs
sampled_archs = sampler.sample(args.m)
X = [[str(e) for e in row] for row in sampled_archs]
test_pool = Pool(X, cat_features=[i for i in range(len(X[0]))])
predicted_val_scores = gbdt_model.predict(test_pool)
# sort the archs according to the predicted value
zipped = zip(sampled_archs, predicted_val_scores)
zipped = sorted(zipped, key=lambda e: e[1], reverse=True) # sort in decreaing order
sampled_archs, predicted_val_scores = zip(*zipped)
sampled_archs, predicted_val_scores = list(sampled_archs), list(predicted_val_scores)
print(f'Iteration round {iter_round}, evaluating top k archs on valid set', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# evaluate top k archs
i = 0
while i < len(sampled_archs):
arch = sampled_archs[i]
data = sampler.load_data(arch)
try:
model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1)
trainer.init_trainer(model, arch[7], arch[6])
val_score = trainer.train(data)
predicted_val_score = predicted_val_scores[i]
except RuntimeError as e:
if "cuda" in str(e) or "CUDA" in str(e): # CUDA OOM, sample another arch
print(e)
sampled_archs += sampler.sample(1)
i += 1
continue
else:
raise e
archs.append(arch)
val_scores.append(val_score)
print(arch, f'predicted val score: {predicted_val_score} | real val score: {val_score}')
print(f'Number of evaluated archs: {len(archs)}')
if i + 1 >= args.k:
break
i += 1
# sort all the evaluated archs
zipped = zip(archs, val_scores)
zipped = sorted(zipped, key=lambda e: e[1], reverse=True)
archs, val_scores = zip(*zipped)
archs, val_scores = list(archs), list(val_scores)
print(f'Iteration round {iter_round}, evaluating top k_test archs on test set', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# evaluate top k_test archs on test set
i = 0
while i < len(archs):
arch = archs[i]
data = sampler.load_data(arch)
try:
model = sampler.build_model(arch, data.x.shape[1], int(max(data.y)) + 1)
trainer.init_trainer(model, arch[7], arch[6])
val_score = trainer.train(data)
test_score, z = trainer.test(data, return_logits=True)
pickle.dump((z, data.y[data.test_mask]), open(f'embeddings/{args.dataset}_AutoGRL-round{iter_round}-top{i + 1}.pt', 'wb'))
except RuntimeError as e:
if "cuda" in str(e) or "CUDA" in str(e): # CUDA OOM, sample another arch
print(e)
i += 1
continue
else:
raise e
top_archs.append(arch)
top_val_scores.append(val_score)
top_test_scores.append(test_score)
print(arch)
print(f'Testing... round {iter_round} | arch top {i + 1} | real val score {val_score} | real test score {test_score}', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
if i + 1 >= args.k_test: # only test top k_test models for every round
break
i += 1
zipped = zip(top_val_scores, top_test_scores)
zipped = sorted(zipped, key=lambda e: e[0], reverse=True)
best_val_score, corr_test_score = zipped[0][0], zipped[0][1]
# logging
print(f'Iteration {iter_round} | best val score {best_val_score} | corresponding test score {corr_test_score} | best test score {max(top_test_scores)}', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
pickle.dump((ss, sampler, trainer, archs, val_scores, gbdt_model, sampled_archs, predicted_val_scores, top_val_scores, top_test_scores), open(f'cache/gbdt/{args.dataset}_seed{args.seed}_round{iter_round}.pt', 'wb'))
main(args)