-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
219 lines (204 loc) · 8.19 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import argparse
import json
import os
import pickle
import time
import matplotlib.pyplot as plt
import numpy as np
import utils
font = {"weight": "bold", "size": 22}
plt.rc("font", **font)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["ps.fonttype"] = 42
plt.rcParams["figure.figsize"] = [9, 5.5] # NIPS format: [9, 5.5]
plt.rcParams["figure.dpi"] = 300
def task_exp(args, extra_args):
N_EXPS = args.nExps
N_TASKS = args.nTasks
N_ARMS = args.nArms
HORIZON = args.horizon
OPT_SIZE = args.optSize
if extra_args["is_adversarial"]:
setting = "Adversarial"
else:
setting = "Stochastic"
if not args.loadCache:
task_list = np.arange(extra_args["exp_args"][0], extra_args["exp_args"][1], extra_args["exp_args"][2])
(X, regret_dict, title, xlabel, ylabel) = utils.task_exp(
N_EXPS, N_ARMS, OPT_SIZE, HORIZON, task_list, **extra_args
)
else:
X = np.arange(N_TASKS)
regret_dict = pickle.load(open(os.path.join(args.cacheDir, "cache_tasks.p"), "rb"))
title = f"{setting}:{N_ARMS} arms, horizon = {HORIZON}, and subset size = {OPT_SIZE}"
xlabel, ylabel = "Number of tasks", "Average Regret per task"
utils.plot(X, regret_dict, title, xlabel, ylabel, **extra_args)
plt.savefig(os.path.join(args.cacheDir, "task_exp.png"))
pickle.dump(
regret_dict,
open(
os.path.join(args.cacheDir, setting + "_tasks_" + str(time.time()) + ".p"),
"wb",
),
)
def horizon_exp(args, extra_args):
N_EXPS = args.nExps
N_TASKS = args.nTasks
N_ARMS = args.nArms
OPT_SIZE = args.optSize
if extra_args["is_adversarial"]:
setting = "Adversarial"
else:
setting = "Stochastic"
if not args.loadCache:
horizon_list = np.arange(extra_args["exp_args"][0], extra_args["exp_args"][1], extra_args["exp_args"][2])
(X_h, regret_dict_h, title, xlabel, ylabel) = utils.horizon_exp(
N_EXPS, N_TASKS, N_ARMS, OPT_SIZE, horizon_list=horizon_list, **extra_args
)
else:
X_h = np.arange(50, 310, 50)
regret_dict_h = pickle.load(open(os.path.join(args.cacheDir, "cache_horizon.p"), "rb"))
title = f"{setting}: {N_ARMS} arms, {N_TASKS} tasks, and subset size = {OPT_SIZE}"
xlabel, ylabel = "Horizon", "Average Regret per Step"
utils.plot(X_h, regret_dict_h, title, xlabel, ylabel, **extra_args)
pickle.dump(
regret_dict_h,
open(
os.path.join(args.cacheDir, setting + "_horizon_" + str(time.time()) + ".p"),
"wb",
),
)
def subset_exp(args, extra_args):
N_EXPS = args.nExps
N_TASKS = args.nTasks
N_ARMS = args.nArms
HORIZON = args.horizon
if extra_args["is_adversarial"]:
setting = "Adversarial"
else:
setting = "Stochastic"
if extra_args["exp_args"] is None:
X_e = np.arange(2, N_ARMS + 1, 1)
else:
X_e = np.arange(extra_args["exp_args"][0], extra_args["exp_args"][1], extra_args["exp_args"][2])
if not args.loadCache:
(X_e, regret_dict_e, title, xlabel, ylabel) = utils.subset_exp(
N_EXPS, N_TASKS, N_ARMS, HORIZON, opt_size_list=X_e, **extra_args
)
else:
title = f"{setting}: {N_ARMS} arms, horizon = {HORIZON}, {N_TASKS} tasks"
xlabel, ylabel = "subset size", "Regret"
regret_dict_e = pickle.load(open(os.path.join(args.cacheDir, "cache_subset.p"), "rb"))
utils.plot(X_e, regret_dict_e, title, xlabel, ylabel, **extra_args)
pickle.dump(
regret_dict_e,
open(
os.path.join(args.cacheDir, setting + "_subset_" + str(time.time()) + ".p"),
"wb",
),
)
def arms_exp(args, extra_args):
N_EXPS = args.nExps
N_TASKS = args.nTasks
HORIZON = args.horizon
OPT_SIZE = args.optSize
if extra_args["is_adversarial"]:
setting = "Adversarial"
else:
setting = "Stochastic"
if not args.loadCache:
n_arms_list = np.arange(extra_args["exp_args"][0], extra_args["exp_args"][1], extra_args["exp_args"][2])
(X_b, regret_dict_b, title, xlabel, ylabel) = utils.arms_exp(
N_EXPS, N_TASKS, OPT_SIZE, HORIZON, n_arms_list, **extra_args
)
else:
title = f"{setting}: Horizon = {HORIZON}, {N_TASKS} tasks, and subset size = {OPT_SIZE}"
xlabel, ylabel = "Number of Arms", "Regret"
X_b = np.arange(3, 8, 1)
regret_dict_b = pickle.load(open(os.path.join(args.cacheDir, "cache_arms.p"), "rb"))
utils.plot(X_b, regret_dict_b, title, xlabel, ylabel, **extra_args)
pickle.dump(
regret_dict_b,
open(
os.path.join(args.cacheDir, setting + "_arms_" + str(time.time()) + ".p"),
"wb",
),
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--exp", help="choose experiment (task, horizon, arms, and subset)", type=str, default="task")
parser.add_argument("--loadCache", dest="loadCache", action="store_true")
parser.add_argument("--notLoadCache", dest="loadCache", action="store_false")
parser.set_defaults(loadCache=False)
parser.add_argument("--adversarial", dest="isAdversarial", action="store_true")
parser.add_argument("--stochastic", dest="isAdversarial", action="store_false")
parser.set_defaults(isAdversarial=True)
parser.add_argument("--nonOblivious", dest="isNonOblivious", action="store_true")
parser.set_defaults(isNonOblivious=False)
parser.add_argument("--quiet", dest="quiet", action="store_true")
parser.add_argument("--notQuiet", dest="quiet", action="store_false")
parser.set_defaults(quiet=True)
parser.add_argument("--nTasks", help="number of tasks", type=int, default=500)
parser.add_argument("--nArms", help="number of arms", type=int, default=80)
parser.add_argument("--nExps", help="number of repeated experiments", type=int, default=5)
parser.add_argument("--optSize", help="size of the optimal subset (must >1)", type=int, default=8)
parser.add_argument("--horizon", help="horizon of each task", type=int, default=4000)
parser.add_argument(
"--timeOut",
help="maximum minutes (for all settings) per experiment (total time divided by (repeat_exps * num_tested_method))",
type=float,
default=2,
)
parser.add_argument(
"--expArgs", help="arguments for horizon or arms. Example: (a,b,c) => range(a,b,c)", type=str, default=None
)
parser.add_argument("--cacheDir", help="directory of cache results", type=str, default="./results")
parser.add_argument("--seed", help="seed number", type=int, default=None)
args = parser.parse_args()
if args.seed is not None:
np.random.seed(args.seed)
exp_args = None
if args.loadCache is True:
if (
args.nTasks != 500
or args.nArms != 80
or args.nExps != 5
or args.optSize != 8
or args.horizon != 4000
or args.expArgs is not None
):
assert (
False
), "When using loadCache, please use the default setting for nTasks, nArms, nExps, optSize, and horizon."
exp_args = None
else:
exp_args = json.loads(args.expArgs)
GAP_THRESHOLD = np.sqrt(args.nArms * np.log(args.nTasks) / args.horizon)
extra_args = {
"exp_args": exp_args,
"gap_constrain": min(1, GAP_THRESHOLD * 1.4), # 1.0005 is small gap, 1.2 for large
"plot_var": True,
"is_adversarial": args.isAdversarial,
"timeout": args.timeOut, # maximum duration for each roll-outs. Unit = minute. -1 = unlimited
"quiet": args.quiet,
"skip_list": [
"E_BASS",
"G_BASS_FC",
"EE",
],
"linewidth": 4,
"plot_legend": True,
"OG_scale": 1, # 0.008,
"is_non_oblivious": args.isNonOblivious,
}
tik = time.time()
if args.exp == "task":
task_exp(args, extra_args)
elif args.exp == "horizon":
horizon_exp(args, extra_args)
elif args.exp == "arms":
arms_exp(args, extra_args)
elif args.exp == "subset":
subset_exp(args, extra_args)
tok = time.time()
print(f"Total time spent: {(tok-tik)/3600} hours.")