-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathFigure3.py
90 lines (81 loc) · 3.15 KB
/
Figure3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gpflow
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from dppy.finite_dpps import FiniteDPP
#from determinental_sample_GP import det_sample_GP as sample
from KL_Bounds import KL_bound, KL_bound2
matplotlib.rcParams.update({'font.size': 30})
jitter = 1e-11
low_jitter = gpflow.settings.get_settings()
low_jitter.numerics.jitter_level = jitter
np.random.seed(1)
lengthscale = .6
# this is high for illustrative purposes, with low variance can't get enough numerical precision
# for L_upper-L_lower to go to zero and still compute Cholesky, in general this bound is very
# sensitive to jitter. T should be monotonically decreasing in M, but isn't if we jitter much.
sn = 1.
k = gpflow.kernels.RBF(1)
k.lengthscales = lengthscale
N = 1000
M = 100
X = np.random.randn(N, 1)
Kff = k.compute_K_symm(X)
Y = np.random.multivariate_normal(mean=np.zeros(N), cov=Kff + np.square(sn) * np.eye(N))[:, None]
y_norm_square = np.sum(np.square(Y))
# compute the full marginal likelihood
full = gpflow.models.GPR(X, Y, k)
full.likelihood.variance = np.square(sn)
full.kern.lengthscales = lengthscale
ml = full.compute_log_likelihood()
# draw a sample of M_max inducing points, we reuse these to ensure monotonicity
#Zs, ind = sample(X, k, M)
gaps = list()
avg_50 = list()
avg_99 = list()
bound_50 = list()
bound_99 = list()
titsias = list()
ms = np.arange(8, 50, 2)
for m in ms:
# low jitter, otherwise hard to see asymptotic behavior
with gpflow.settings.temp_settings(low_jitter):
# fir sparse model
DPP = FiniteDPP('likelihood', **{'L': Kff+np.eye(N)*1e-6})
DPP.flush_samples()
DPP.sample_exact_k_dpp(size=m)
ind = DPP.list_of_samples[0]
Det_Init_M = gpflow.models.SGPR(X, Y, kern=k, Z=X[ind, :])
Det_Init_M.likelihood.variance = np.square(sn)
Det_Init_M.kern.lengthscales = lengthscale
# compute elbo
elbo = Det_Init_M.compute_log_likelihood()
# true KL-divergence
gaps.append(ml - elbo)
# bounds from theorem 3
bound_50.append(
KL_bound(k_var=1, k_ls=lengthscale, sigma_n=sn, N=1000, p_sd=1, p_success=.5, bound_y=y_norm_square, M=m))
bound_99.append(
KL_bound(k_var=1, k_ls=lengthscale, sigma_n=sn, N=1000, p_sd=1, p_success=.99, bound_y=y_norm_square, M=m))
titsias.append(Det_Init_M.compute_upper_bound() - elbo)
# bounds from theorem 4
avg_50.append(KL_bound2(k_var=1, k_ls=lengthscale, sigma_n=sn, N=1000, p_sd=1, p_success=.5, M=m))
avg_99.append(KL_bound2(k_var=1, k_ls=lengthscale, sigma_n=sn, N=1000, p_sd=1, p_success=.99, M=m))
# plotting
plt.rc('font', size=18)
plt.rc('text', usetex=True)
plt.figure(figsize=(12,5))
plt.plot(ms, gaps)
plt.plot(ms, titsias)
plt.plot(ms, avg_50, ls=":")
plt.plot(ms, avg_99, ls=":")
plt.plot(ms, bound_50, ls="--")
plt.plot(ms, bound_99, ls="--")
plt.ylim([0., 10.])
plt.xlim([8, 50])
plt.legend(["Actual KL Divergence", "$\mathcal{L}_{upper}-\mathcal{L}_{Lower}$", "Theorem 4, p=.5", "Theorem 4, p=.99",
"Theorem 3, p=.5", "Theorem 3, p=.99"])
plt.ylabel("KL Divergence")
plt.xlabel("Number of Inducing points")
plt.tight_layout()
plt.savefig("Figure3.pdf")