Skip to content

Commit

Permalink
Merge pull request #23 from fdtomasi/develop
Browse files Browse the repository at this point in the history
Datasets fix and tests addition
  • Loading branch information
fdtomasi authored Mar 7, 2020
2 parents 75a918d + 2de985d commit 3f99857
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 117 deletions.
2 changes: 1 addition & 1 deletion regain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

__version__ = '0.2.2'
__version__ = '0.2.3'
25 changes: 11 additions & 14 deletions regain/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,10 @@ def _gaussian_case(
normalize_starting_matrices=False, degree=2, epsilon=1e-2,
keep_sparsity=False, proportional=False, **kwargs):
modes = dict(
my=data_Meinshausen_Yuan,
mys=data_Meinshausen_Yuan_sparse_latent,
sin=make_sin,
fixed_sparsity=make_fixed_sparsity,
sincos=make_sin_cos,
gp=make_exp_sine_squared,
fede=make_fede,
sklearn=make_sparse_low_rank,
ma=make_ma_xue_zou,
mak=make_ma_xue_zou_rand_k,
ticc=make_ticc)
my=data_Meinshausen_Yuan, mys=data_Meinshausen_Yuan_sparse_latent,
sin=make_sin, fixed_sparsity=make_fixed_sparsity, sincos=make_sin_cos,
gp=make_exp_sine_squared, fede=make_fede, sklearn=make_sparse_low_rank,
ma=make_ma_xue_zou, mak=make_ma_xue_zou_rand_k, ticc=make_ticc)

if mode is not None:
# mode overrides other parameters, for back compatibility
Expand All @@ -84,7 +77,7 @@ def _gaussian_case(
# normalize_starting_matrices=normalize_starting_matrices,
# degree=degree, epsilon=epsilon, keep_sparsity=keep_sparsity,
# proportional=proportional)
thetas, thetas_obs, ells = func(n_dim_obs, n_dim_lat, T, **kwargs)
thetas, thetas_obs, ells = func(n_dim_obs=n_dim_obs, n_dim_lat=n_dim_lat, T=T, **kwargs)
sigmas = list(map(np.linalg.inv, thetas_obs))
# map(normalize_matrix, sigmas) # in place

Expand Down Expand Up @@ -114,9 +107,11 @@ def _ising_case(
for t in thetas
]
data = np.array(samples)
X = np.vstack(data)
y = np.repeat(range(len(thetas)), n_samples).astype(int)
if time_on_axis == "last":
data = data.transpose(1, 2, 0)
return data, thetas
return Bunch(data=data, thetas=np.array(thetas), X=X, y=y)


def _poisson_case(
Expand All @@ -129,9 +124,11 @@ def _poisson_case(
for t in thetas
]
data = np.array(samples)
X = np.vstack(data)
y = np.repeat(range(len(thetas)), n_samples).astype(int)
if time_on_axis == "last":
data = data.transpose(1, 2, 0)
return data, thetas
return Bunch(data=data, thetas=np.array(thetas), X=X, y=y)


def make_dataset(
Expand Down
132 changes: 33 additions & 99 deletions regain/datasets/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,7 @@ def _permute_locations(locations, random_state):
return new_locations


def data_Meinshausen_Yuan(
p=198, h=2, n=200, T=10, random_state=None, **kwargs):
random_state = check_random_state(random_state)
nodes_locations = []
for i in range(p):
nodes_locations.append(list(random_state.uniform(size=2)))
probs = _compute_probabilities(nodes_locations, random_state)
def _theta_my(probs, p, random_state, value=0.245):
theta = np.zeros((p, p))
for i in range(p):
ix = np.argsort(probs[i, :])[::-1]
Expand All @@ -86,18 +80,26 @@ def data_Meinshausen_Yuan(
ixs.append(ix[j])
sel += 1
j += 1
theta[i, ixs] = 0.245
theta[ixs, i] = 0.245
theta[i, ixs] = theta[ixs, i] = value
to_remove = np.where(np.sum((theta != 0).astype(int), axis=1) > 4)[0]
for t in to_remove:
edges = np.nonzero(theta[t, :])[0]
random_state.shuffle(edges)
removable = edges[:-4]
theta[t, removable] = 0
theta[removable, t] = 0
np.fill_diagonal(theta, 1)
theta[t, removable] = theta[removable, t] = 0

np.fill_diagonal(theta, 1)
assert is_pos_def(theta)
return theta


def data_Meinshausen_Yuan(
p=198, h=2, n=200, T=10, random_state=None, **kwargs):
random_state = check_random_state(random_state)
nodes_locations = [list(random_state.uniform(size=2)) for i in range(p)]
probs = _compute_probabilities(nodes_locations, random_state)
theta = _theta_my(probs, p, random_state=random_state)

latent = np.zeros((h, p))
for i in range(h):
for j in range(p):
Expand All @@ -124,32 +126,7 @@ def data_Meinshausen_Yuan(
locations.append(locs)
probs = _compute_probabilities(locs, random_state, theta_prev != 0)

theta = np.zeros((p, p))
for i in range(p):
ix = np.argsort(probs[i, :])[::-1]
no_nonzero = np.sum((theta != 0).astype(int), axis=1)[i]
ixs = []
sel = 1
j = 0
while sel <= 4 - no_nonzero and j < p:
if ix[j] < i:
j += 1
continue
ixs.append(ix[j])
sel += 1
j += 1
theta[i, ixs] = 0.245
theta[ixs, i] = 0.245
to_remove = np.where(np.sum((theta != 0).astype(int), axis=1) > 4)[0]
for t in to_remove:
edges = np.nonzero(theta[t, :])[0]
random_state.shuffle(edges)
removable = edges[:-4]
theta[t, removable] = 0
theta[removable, t] = 0
np.fill_diagonal(theta, 1)

assert is_pos_def(theta)
theta = _theta_my(probs, p, random_state=random_state)
thetas.append(theta)
theta_observed = theta - L
assert is_pos_def(theta_observed)
Expand All @@ -164,37 +141,13 @@ def data_Meinshausen_Yuan(


def data_Meinshausen_Yuan_sparse_latent(
p=198, h=2, n=200, T=10, random_state=None):
p=198, h=2, n=200, T=10, random_state=None, **kwargs):
random_state = check_random_state(random_state)
nodes_locations = [
list(random_state.uniform(size=2)) for i in range(p + h)
]
probs = _compute_probabilities(nodes_locations, random_state)
theta = np.zeros((p, p))
for i in range(p):
ix = np.argsort(probs[i, :-h])[::-1]
no_nonzero = np.sum((theta != 0).astype(int), axis=1)[i]
ixs = []
sel = 1
j = 0
while sel <= 4 - no_nonzero and j < p:
if ix[j] < i:
j += 1
continue
ixs.append(ix[j])
sel += 1
j += 1
theta[ixs, i] = theta[i, ixs] = 0.245
to_remove = np.where(np.sum((theta != 0).astype(int), axis=1) > 4)[0]
for t in to_remove:
edges = np.nonzero(theta[t, :])[0]
random_state.shuffle(edges)
removable = edges[:-4]
theta[t, removable] = 0
theta[removable, t] = 0
np.fill_diagonal(theta, 1)

assert is_pos_def(theta)
theta = _theta_my(probs[:, :-h], p, random_state=random_state)

latent = np.zeros((h, p + h))
to_put = (p + h) * 0.5
Expand Down Expand Up @@ -258,31 +211,7 @@ def data_Meinshausen_Yuan_sparse_latent(
probs_lat = _compute_probabilities(
locs[-h:], random_state, lat_prev != 0)

theta = np.zeros((p, p))
for i in range(p):
ix = np.argsort(probs_theta[i, :])[::-1]
no_nonzero = np.sum((theta != 0).astype(int), axis=1)[i]
ixs = []
sel = 1
j = 0
while sel <= 4 - no_nonzero and j < p:
if ix[j] < i:
j += 1
continue
ixs.append(ix[j])
sel += 1
j += 1
theta[i, ixs] = 0.245
theta[ixs, i] = 0.245
to_remove = np.where(np.sum((theta != 0).astype(int), axis=1) > 4)[0]
for t in to_remove:
edges = np.nonzero(theta[t, :])[0]
random_state.shuffle(edges)
removable = edges[:-4]
theta[t, removable] = 0
theta[removable, t] = 0
np.fill_diagonal(theta, 1)
assert is_pos_def(theta)
theta = _theta_my(probs_theta, p, random_state=random_state)
thetas_O.append(theta)

lat = np.zeros((h, h))
Expand Down Expand Up @@ -332,7 +261,7 @@ def data_Meinshausen_Yuan_sparse_latent(


def make_ell(n_dim_obs=100, n_dim_lat=10):
"""Doc."""
"""Make ell matrix."""
K_HO = np.zeros((n_dim_lat, n_dim_obs))
for i in range(n_dim_lat):
percentage = int(n_dim_obs * 0.99)
Expand All @@ -352,7 +281,8 @@ def make_ell(n_dim_obs=100, n_dim_lat=10):
return L, K_HO


def make_starting(n_dim_obs=100, n_dim_lat=10, degree=2, normalize=False):
def make_starting(
n_dim_obs=100, n_dim_lat=10, degree=2, normalize=False, eps=1e-2):
"""Generate starting theta, theta_observed, L, K_HO."""
L, K_HO = make_ell(n_dim_obs, n_dim_lat)

Expand Down Expand Up @@ -388,10 +318,12 @@ def make_starting(n_dim_obs=100, n_dim_lat=10, degree=2, normalize=False):
indices = np.random.choice(possible_idx, n_choice)
theta[i, indices] = theta[indices, i] = .5 / degree

assert (is_pos_def(theta))
theta_observed = theta - L
assert (is_pos_def(theta_observed))
return theta, theta_observed, L, K_HO
if not is_pos_def(theta):
theta += np.diag(np.sum(theta, axis=1) + eps)
if not is_pos_def(theta - L):
theta += np.diag(eps + np.diag(L))
assert (is_pos_def(theta - L))
return theta, theta - L, L, K_HO


def _update_theta_l2(
Expand All @@ -413,7 +345,7 @@ def _update_theta_l2(
return theta


def _update_theta_l1(theta_init, no, n_dim_obs):
def _update_theta_l1(theta_init, no, n_dim_obs, eps=1e-2):
theta = theta_init.copy()
rows = np.zeros(no)
cols = np.zeros(no)
Expand All @@ -425,7 +357,8 @@ def _update_theta_l1(theta_init, no, n_dim_obs):
0.12, 0, 0
]) if theta[r, c] == 0 else .06 # np.random.rand(1) * .35
theta[c, r] = theta[r, c]
assert (is_pos_def(theta))
if not is_pos_def(theta):
theta += np.diag(np.sum(theta, axis=1) + eps)
return theta


Expand Down Expand Up @@ -493,7 +426,8 @@ def make_covariance(

assert np.linalg.matrix_rank(L) == n_dim_lat
assert (is_pos_semidef(L))
assert (is_pos_def(theta - L))
if not is_pos_def(theta - L):
theta += np.diag(epsilon + np.diag(L))

thetas.append(theta)
thetas_obs.append(theta - L)
Expand Down Expand Up @@ -720,7 +654,7 @@ def make_ma_xue_zou(


def make_ma_xue_zou_rand_k(
n_dim_obs=12, n_latent=3, T=1, epsilon=1e-3, sparsity=0.1):
n_dim_obs=12, n_latent=3, T=1, epsilon=1e-3, sparsity=0.1, **kwargs):
"""Generate the dataset as in Ma, Xue, Zou (2012)."""
# p = n_dim_obs + n_latent # int(n_dim_obs * 0.05)
p = n_dim_obs + int(n_dim_obs * 0.05)
Expand Down
4 changes: 2 additions & 2 deletions regain/datasets/ising.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import itertools

import numpy as np
import networkx as nx
import warnings


Expand All @@ -58,7 +57,7 @@ def ising_theta_generator(
n_dim_obs=10, n=100, T=10, mode='l1', time_on_axis='first', change=1,
responses=[-1, 1], random_graph='scale-free', probability=0.3,
degree=2):
"""Generates adjacency matix for Ising graphical model.
"""Generates adjacency matrix for Ising graphical model.
Parameters
----------
Expand Down Expand Up @@ -95,6 +94,7 @@ def ising_theta_generator(
list:
List of adjaceny matrix of length T.
"""
import networkx as nx
if random_graph.lower() == 'erdos-renyi':
graph = nx.random_graphs.fast_gnp_random_graph(n=n_dim_obs,
p=probability)
Expand Down
2 changes: 1 addition & 1 deletion regain/datasets/poisson.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import warnings

import networkx as nx
import numpy as np


Expand Down Expand Up @@ -88,6 +87,7 @@ def poisson_theta_generator(n_dim_obs=10,
list:
List of adjaceny matrix of length T.
"""
import networkx as nx
if random_graph.lower() == 'erdos-renyi':
graph = nx.random_graphs.fast_gnp_random_graph(n=n_dim_obs,
p=probability)
Expand Down
Loading

0 comments on commit 3f99857

Please sign in to comment.