Skip to content

Commit

Permalink
Merge pull request #16 from fdtomasi/develop
Browse files Browse the repository at this point in the history
Renaming of GraphLasso classes
  • Loading branch information
fdtomasi authored Feb 26, 2019
2 parents 9da7c28 + a827b18 commit 8f20d87
Show file tree
Hide file tree
Showing 25 changed files with 639 additions and 1,551 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,5 @@ ENV/

*~
.magicindex.json

\.vscode/
2 changes: 1 addition & 1 deletion regain/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.7'
__version__ = '0.2.0'
102 changes: 57 additions & 45 deletions regain/bayesian/gwishart_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,19 @@
from sklearn.utils import Bunch, check_array
from sklearn.utils.extmath import fast_logdet

from regain.covariance.graph_lasso_ import GraphLasso, graph_lasso
from regain.covariance.graphical_lasso_ import GraphicalLasso, graphical_lasso


def mk_all_ugs(n_dim):
"""Utility for generating all possible """
nedges = int(comb(n_dim, 2))
m = 2 ** nedges
m = 2**nedges

ind = np.array([list(binary_repr(x, width=len(binary_repr(m-1))))
for x in range(m)]).astype(int)
ind = np.array(
[
list(binary_repr(x, width=len(binary_repr(m - 1))))
for x in range(m)
]).astype(int)
ord = np.argsort(ind.sum(axis=1))
ind = ind[ord]

Expand All @@ -42,8 +45,9 @@ def mk_all_ugs(n_dim):

def markov_blankets(graphs, boolean=True, tol=1e-8, unique=False):
"""For each variable, list each of its markov blankets in graphs."""
m_blankets = [np.array([G[i] for G in graphs]) for i in
range(graphs[0].shape[0])]
m_blankets = [
np.array([G[i] for G in graphs]) for i in range(graphs[0].shape[0])
]
for i, mb in enumerate(m_blankets):
mb[:, i] = 0
mb[np.abs(mb) < tol] = 0
Expand Down Expand Up @@ -71,25 +75,30 @@ def score_blankets(blankets, X, alphas=(0.01, 0.5, 1)):
X_mb = np.zeros((X.shape[0], 1))

y_mb = X[:, i]
score = np.sum([LassoLars(alpha=alpha).fit(
X_mb, y_mb).score(X_mb, y_mb) for alpha in alphas])
score = np.sum(
[
LassoLars(alpha=alpha).fit(X_mb, y_mb).score(X_mb, y_mb)
for alpha in alphas
])

scores.append(score)
scores_all.append(scores)

ss = np.exp(np.array(scores_all))
normalized_scores = ss / np.repeat(ss.sum(axis=1)[:, None],
ss.shape[1], axis=1)
normalized_scores = ss / np.repeat(
ss.sum(axis=1)[:, None], ss.shape[1], axis=1)
return normalized_scores


def get_graphs(blankets, scores, n_dim, n_resampling=200):
idx = np.array([np.random.choice(
scores.shape[1], p=scores[i], size=n_resampling)
for i in range(n_dim)])

graphs_ = np.array([blankets[i][idx[i]] for i in
range(n_dim)]).transpose(1, 0, 2)
idx = np.array(
[
np.random.choice(scores.shape[1], p=scores[i], size=n_resampling)
for i in range(n_dim)
])

graphs_ = np.array([blankets[i][idx[i]] for i in range(n_dim)]).transpose(
1, 0, 2)
# symmetrise with AND operator -> product
graphs = np.array([g * g.T for g in graphs_])
return graphs
Expand All @@ -102,12 +111,12 @@ def covsel(x, p, nonZero, C):
Based on sparse GGM estimation code by Mark Schmidt.
"""
X = np.zeros((p, p))
X[nonZero] = x # fill the diagonal and upper triangle
X[nonZero] = x # fill the diagonal and upper triangle
X += np.triu(X, 1).T # fill the lower triangle

# Fast Way to compute -logdet(X) + tr(X*C)
# f = -2*sum(log(diag(R))) + sum(sum(C.*X)) + (lambda/2)*sum(X(:).^2);
f = - fast_logdet(X) + np.sum(C * X)
f = -fast_logdet(X) + np.sum(C * X)

if f < np.inf:
g = C - linalg.pinvh(X)
Expand Down Expand Up @@ -159,7 +168,7 @@ def GWishartFit(X, G, GWprior, mode='covsel'):
# convert G to alpha
alpha = np.zeros_like(G, dtype=float)
alpha[~(G + G.T)] = np.inf
precision = graph_lasso(emp_cov=C, alpha=alpha)[0]
precision = graphical_lasso(emp_cov=C, alpha=alpha)[0]

return precision, S

Expand Down Expand Up @@ -239,11 +248,11 @@ def compute_score(X, G, P, S, GWprior=None, score_method='bic'):

tmp2 = A[0, :].dot(B[:, 0]) + A[1, :].dot(B[:, 1])

diagH[e1] = -(dn-2) * tmp2 / 2
diagH[e1] = -(dn - 2) * tmp2 / 2
# diagH(e1) = -(dn-2) * trace(M1(nz2,nz1)*M2(nz1,nz2))/2;

logdetHdiag = sum(np.log(-diagH))
lognormconst = dof * np.log(2*np.pi)/2 + logh - logdetHdiag / 2.
lognormconst = dof * np.log(2 * np.pi) / 2 + logh - logdetHdiag / 2.
score = lognormconst - GWprior.lognormconst - \
n_samples * n_dim * np.log(2 * np.pi) / 2
GWpost.lognormconst = lognormconst
Expand All @@ -266,11 +275,11 @@ def compute_score(X, G, P, S, GWprior=None, score_method='bic'):
# tmp2 = A[0, :].dot(B[:, 0]) + A[1, :].dot(B[:, 1])
# tmp2 = np.trace(A.dot(B))
tmp2 = (A * B.T).sum()
H[e2, e1] = H[e1, e2] = -(dn-2) * tmp2 / 2.
H[e2, e1] = H[e1, e2] = -(dn - 2) * tmp2 / 2.

# neg Hessian will be posdef
logdetH = 2*sum(np.log(np.diag(linalg.cholesky(-H))))
lognormconst = dof * np.log(2*np.pi)/2 + logh - logdetH / 2.
logdetH = 2 * sum(np.log(np.diag(linalg.cholesky(-H))))
lognormconst = dof * np.log(2 * np.pi) / 2 + logh - logdetH / 2.
score = lognormconst - GWprior.lognormconst - \
n_samples * n_dim * np.log(2 * np.pi) / 2
GWpost.lognormconst = lognormconst
Expand All @@ -283,7 +292,7 @@ def GWishartScore(X, G, d0=3, S0=None, score_method='bic', mode='covsel'):
# %Initialize GW prior structure
n_samples, n_dim = X.shape
if S0 is None:
S0 = (d0 - 2) * np.diag(np.diag(np.cov(X,bias=True)))
S0 = (d0 - 2) * np.diag(np.diag(np.cov(X, bias=True)))

GWprior = Bunch(d0=d0, S0=S0, lognormconst=0, lognormconstDiag=0)

Expand All @@ -292,8 +301,9 @@ def GWishartScore(X, G, d0=3, S0=None, score_method='bic', mode='covsel'):
noData = np.zeros((0, n_dim))

P0, S_noData = GWishartFit(noData, G, GWprior)
GWtemp = compute_score(noData, G, P0, S_noData, GWprior=GWprior,
score_method=score_method)
GWtemp = compute_score(
noData, G, P0, S_noData, GWprior=GWprior,
score_method=score_method)
GWprior.lognormconst = GWtemp.lognormconst

# Compute the map precision matrix P
Expand All @@ -304,7 +314,7 @@ def GWishartScore(X, G, d0=3, S0=None, score_method='bic', mode='covsel'):
return GWpost


def bayesian_graph_lasso(
def bayesian_graphical_lasso(
X, tol=1e-8, alphas=None, n_resampling=200, mode='gl', top_n=1,
scoring='diaglaplace', assume_centered=False, max_iter=100):
"""Bayesian graphical lasso."""
Expand All @@ -314,17 +324,16 @@ def bayesian_graph_lasso(
alphas = np.logspace(-2, 0, 20)

# get a series of Markov blankets for vaiours alphas
mdl = GraphLasso(assume_centered=assume_centered, tol=tol,
max_iter=max_iter, verbose=False)
precisions = [
mdl.set_params(alpha=a).fit(X).precision_
for a in alphas]
mdl = GraphicalLasso(
assume_centered=assume_centered, tol=tol, max_iter=max_iter,
verbose=False)
precisions = [mdl.set_params(alpha=a).fit(X).precision_ for a in alphas]
mblankets = markov_blankets(precisions, tol=tol, unique=True)

normalized_scores = score_blankets(mblankets, X=X, alphas=[0.01, 0.5, 1])

graphs = get_graphs(mblankets, normalized_scores, n_dim=n_dim,
n_resampling=n_resampling)
graphs = get_graphs(
mblankets, normalized_scores, n_dim=n_dim, n_resampling=n_resampling)

nonzeros_all = [np.triu(g, 1) + np.eye(n_dim, dtype=bool) for g in graphs]

Expand All @@ -335,14 +344,16 @@ def bayesian_graph_lasso(
# Find non-zero elements of upper triangle of G
# make sure diagonal is non-zero
# G = nonzeros_all[1] # probably can discard if all zeros?
res = [GWishartScore(X, G, d0=d0, S0=S0, mode=mode, score_method=scoring)
for G in nonzeros_all]
res = [
GWishartScore(X, G, d0=d0, S0=S0, mode=mode, score_method=scoring)
for G in nonzeros_all
]

top_n = [x.P for x in sorted(res, key=lambda x: x.score)[::-1][:top_n]]
return np.mean(top_n, axis=0)


class BayesianGraphLasso(GraphLasso):
class BayesianGraphicalLasso(GraphicalLasso):
"""Sparse inverse covariance estimation with an l1-penalized estimator.
Parameters
Expand Down Expand Up @@ -400,10 +411,11 @@ class BayesianGraphLasso(GraphLasso):
"""

def __init__(self, alpha=0.01, max_iter=100, alphas=None, n_resampling=200,
tol=1e-4, verbose=False, assume_centered=False, mode='gl',
scoring='diaglaplace', top_n=1):
super(GraphLasso, self).__init__(
def __init__(
self, alpha=0.01, max_iter=100, alphas=None, n_resampling=200,
tol=1e-4, verbose=False, assume_centered=False, mode='gl',
scoring='diaglaplace', top_n=1):
super(GraphicalLasso, self).__init__(
alpha=alpha, tol=tol, max_iter=max_iter, verbose=verbose,
assume_centered=assume_centered, mode=mode)
self.alphas = alphas
Expand All @@ -422,13 +434,13 @@ def fit(self, X, y=None):
"""
# Covariance does not make sense for a single feature
X = check_array(X, ensure_min_features=2, ensure_min_samples=2,
estimator=self)
X = check_array(
X, ensure_min_features=2, ensure_min_samples=2, estimator=self)

if self.alphas is None:
self.alphas = np.logspace(-2, 0, 20)

self.precision_ = bayesian_graph_lasso(
self.precision_ = bayesian_graphical_lasso(
X, tol=self.tol, alphas=self.alphas,
n_resampling=self.n_resampling, mode=self.mode,
scoring=self.scoring, assume_centered=self.assume_centered,
Expand Down
12 changes: 6 additions & 6 deletions regain/bayesian/wishart_process_.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@

from regain.bayesian import stats
from regain.bayesian.gaussian_process_ import sample as sample_gp
from regain.bayesian.sampling import elliptical_slice, sample_hyper_kernel, sample_ell, GWP_construct
from regain.covariance.time_graph_lasso_ import TimeGraphLasso
from regain.bayesian.sampling import (GWP_construct, elliptical_slice,
sample_ell, sample_hyper_kernel)
from regain.covariance.time_graphical_lasso_ import TimeGraphicalLasso


def fit(
Expand Down Expand Up @@ -151,12 +152,11 @@ def kernel(X, Y=None, var=None, inverse_width=None, normalised=False):
return k


def periodic_kernel(X, Y=None, inverse_width=None):
k = kernels.ExpSineSquared(X, Y=Y, length_scale=inverse_width)
return k
def periodic_kernel(X, Y=None, inverse_width=1):
return kernels.ExpSineSquared(length_scale=inverse_width)(X, Y=Y)


class WishartProcess(TimeGraphLasso):
class WishartProcess(TimeGraphicalLasso):
def __init__(
self, theta=100, var_prop=1, mu_prior=1, var_prior=10,
var_Lprop=10, mu_Lprior=1, var_Lprior=1, n_iter=500, burn_in=None,
Expand Down
8 changes: 4 additions & 4 deletions regain/covariance/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .graph_lasso_ import GraphLasso
from .latent_graph_lasso_ import LatentGraphLasso
from .time_graph_lasso_ import TimeGraphLasso
from .latent_time_graph_lasso_ import LatentTimeGraphLasso
from .graphical_lasso_ import GraphicalLasso
from .latent_graphical_lasso_ import LatentGraphicalLasso
from .time_graphical_lasso_ import TimeGraphicalLasso
from .latent_time_graphical_lasso_ import LatentTimeGraphicalLasso
Loading

0 comments on commit 8f20d87

Please sign in to comment.