-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathom-lex.py
103 lines (81 loc) · 2.96 KB
/
om-lex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
# initializes Setting 1
mean = np.array([[0.50, 0.50], [0.50, 0.40], [0.40, 0.90]])
# initializes Setting 2
# mean = np.array([[0.50, 0.50], [0.50, 0.40], [0.40, 0.50]])
# initializes Setting 3
# mean = np.array([[0.50, 0.50], [0.50, 0.40], [0.40, 0.10]])
# initializes Setting 4
# mean = [0.90, 0.50, 0.40, 0.10]
# mean = [[i, j, k] for i in mean for j in mean for k in mean \
# if (i<0.50) \
# or (i==0.50 and j<0.50) \
# or (i==0.50 and j==0.50 and k<0.50) \
# or (i==0.50 and j==0.50 and k==0.50)]
# mean = np.array(mean)
# initializes Setting 5
# mean = [0.90, 0.50, 0.40, 0.10]
# mean = [[i, j, k] for i in mean for j in mean for k in mean \
# if (i<0.50) \
# or (i==0.50 and j<0.50) \
# or (i==0.50 and j==0.50 and k<0.50) \
# or (i==0.50 and j==0.50 and k==0.50)]
# mean = [[i, j, k] for [i, j, k] in mean if j >= 0.50]
# mean = np.array(mean)
mean_set = mean
A, D = mean.shape
_, D_set = mean_set.shape
# converts to the single-objective case
# D = 1
# mean = mean[:,0,None]
K = 100
T = 100000
reg = np.zeros((D_set, K, T))
# set True for priority-free regrets
reg_alt = False
for k in range(K):
print("k:", k)
# initializes updates and estimates (each arm is played once)
U = (np.random.uniform(size=(A,D)) <= mean).astype(float)
N = np.ones((A,1))
# regret of the initialization
for a in range(A):
for d in range(D_set):
if mean_set[0,d] != mean_set[a,d]:
reg[d,k,a] = mean_set[0,d] - mean_set[a,d]
if not reg_alt:
break
t = A
while t < T:
# arm selection
diff = np.abs(U-mean[0])
cond = diff < np.sqrt(4*np.log(N)/N)
if cond.all(1).any():
a = np.argwhere(cond.all(1))
a = np.random.choice(a.flatten())
# samples rewards, updates estimates and counters
X = (np.random.uniform(size=(1,D)) <= mean[a]).astype(float)
U[a] = (X + N[a]*U[a]) / (N[a]+1)
N[a] += 1
# regret
for d in range(D_set):
if mean_set[0,d] != mean_set[a,d]:
reg[d,k,t] = mean_set[0,d] - mean_set[a,d]
if not reg_alt:
break
t += 1
else:
# round-robin play
for a in range(A):
# samples rewards, updates estimates and counters
X = (np.random.uniform(size=(1,D)) <= mean[a]).astype(float)
U[a] = (X + N[a]*U[a]) / (N[a]+1)
N[a] += 1
# regret
for d in range(D_set):
if mean_set[0,d] != mean_set[a,d]:
reg[d,k,t] = mean_set[0,d] - mean_set[a,d]
if not reg_alt:
break
t += 1
np.save("res/om-lex.npy", [(A,D),mean,(K,T),reg])