From 4771d6e1b29145f4265f02b5736addbb0a6955f9 Mon Sep 17 00:00:00 2001 From: Marcel Falkiewicz Date: Wed, 27 Dec 2017 11:21:28 +0100 Subject: [PATCH 1/7] Fixed affinity matrix code for sklearn --- mapalign/embed.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/mapalign/embed.py b/mapalign/embed.py index 67662e4..c9e123f 100644 --- a/mapalign/embed.py +++ b/mapalign/embed.py @@ -345,7 +345,6 @@ def _get_affinity_matrix(self, X, Y=None): """ if self.affinity == 'precomputed': self.affinity_matrix_ = X - return self.affinity_matrix_ if self.affinity == 'nearest_neighbors': if sparse.issparse(X): warnings.warn("Nearest neighbors affinity currently does " @@ -362,21 +361,16 @@ def _get_affinity_matrix(self, X, Y=None): # currently only symmetric affinity_matrix supported self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ + self.affinity_matrix_.T) - return self.affinity_matrix_ if self.affinity == 'rbf': self.gamma_ = (self.gamma if self.gamma is not None else 1.0 / X.shape[1]) self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_) - return self.affinity_matrix_ if self.affinity in ['markov', 'cauchy']: from .dist import compute_affinity self.affinity_matrix_ = compute_affinity(X, method=self.affinity, eps=self.gamma, metric=self.metric) - return self.affinity_matrix_ - self.affinity_matrix_ = self.affinity(X) - return self.affinity_matrix_ def fit(self, X, y=None): """Fit the model from data in X. @@ -413,14 +407,14 @@ def fit(self, X, y=None): raise ValueError(("'affinity' is expected to be an affinity " "name or a callable. Got: %s") % self.affinity) - affinity_matrix = self._get_affinity_matrix(X) + self._get_affinity_matrix(X) if self.use_variant: - self.embedding_ = compute_diffusion_map_psd(affinity_matrix, + self.embedding_ = compute_diffusion_map_psd(self.affinity_matrix_, alpha=self.alpha, n_components=self.n_components, diffusion_time=self.diffusion_time) else: - self.embedding_ = compute_diffusion_map(affinity_matrix, + self.embedding_ = compute_diffusion_map(self.affinity_matrix_, alpha=self.alpha, n_components=self.n_components, diffusion_time=self.diffusion_time, From d74a4d149b5529c3832819aaa5c7b2140b427a2d Mon Sep 17 00:00:00 2001 From: Marcel Falkiewicz Date: Wed, 27 Dec 2017 11:53:38 +0100 Subject: [PATCH 2/7] Fixed imports --- mapalign/embed.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mapalign/embed.py b/mapalign/embed.py index c9e123f..a4b5543 100644 --- a/mapalign/embed.py +++ b/mapalign/embed.py @@ -2,9 +2,13 @@ """ import numpy as np +import scipy.sparse as sps + has_sklearn = True try: import sklearn + from sklearn.neighbors import kneighbors_graph + from sklearn.metrics.pairwise import rbf_kernel except ImportError: has_sklearn = False @@ -346,7 +350,7 @@ def _get_affinity_matrix(self, X, Y=None): if self.affinity == 'precomputed': self.affinity_matrix_ = X if self.affinity == 'nearest_neighbors': - if sparse.issparse(X): + if sps.issparse(X): warnings.warn("Nearest neighbors affinity currently does " "not support sparse input, falling back to " "rbf affinity") From 49c627f03a6c21b437ce3f14cff6ea897a540aa8 Mon Sep 17 00:00:00 2001 From: Marcel Falkiewicz Date: Thu, 28 Dec 2017 21:58:58 +0100 Subject: [PATCH 3/7] Changed eps calculation, added f_neighbors as parameter --- mapalign/dist.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mapalign/dist.py b/mapalign/dist.py index 0664c55..4590369 100644 --- a/mapalign/dist.py +++ b/mapalign/dist.py @@ -127,11 +127,12 @@ def compute_nearest_neighbor_graph(K, n_neighbors=50): return K -def compute_affinity(X, method='markov', eps=None, metric='euclidean'): +def compute_affinity(X, method='markov', f_neighbors = 0.01, eps=None, metric='euclidean'): """Compute the similarity or affinity matrix between the samples in X :param X: A set of samples with number of rows > 1 :param method: 'markov' or 'cauchy' kernel (default: markov) + :param f_neighbors: Fraction of nearest neighbors to consider for automatic eps calculation :param eps: scaling factor for kernel :param metric: metric to compute pairwise distances :return: a similarity matrix @@ -147,8 +148,8 @@ def compute_affinity(X, method='markov', eps=None, metric='euclidean'): import numpy as np D = squareform(pdist(X, metric=metric)) if eps is None: - k = int(max(2, np.round(D.shape[0] * 0.01))) - eps = 2 * np.median(np.sort(D, axis=0)[k+1, :])**2 + k = int(max(2, np.round(D.shape[0] * f_neighbors))) + eps = np.median(np.sort(D, axis=0)[0:k+1, :]**2) if method == 'markov': affinity_matrix = np.exp(-(D * D) / eps) elif method == 'cauchy': From ac9a191354d71b36d00c4c2b302b5577db9e89c9 Mon Sep 17 00:00:00 2001 From: Marcel Falkiewicz Date: Thu, 28 Dec 2017 22:02:06 +0100 Subject: [PATCH 4/7] Row sum 1 -> Col sum 1 --- mapalign/embed.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mapalign/embed.py b/mapalign/embed.py index a4b5543..1f89215 100644 --- a/mapalign/embed.py +++ b/mapalign/embed.py @@ -100,11 +100,11 @@ def compute_diffusion_map(L, alpha=0.5, n_components=None, diffusion_time=0, L_alpha = L_alpha * d_alpha[np.newaxis, :] # Step 3 - d_alpha = np.power(np.array(L_alpha.sum(axis=1)).flatten(), -1) + d_alpha = np.power(np.array(L_alpha.sum(axis=0)).flatten(), -1) if use_sparse: L_alpha.data *= d_alpha[L_alpha.indices] else: - L_alpha = d_alpha[:, np.newaxis] * L_alpha + L_alpha = d_alpha[np.newaxis, :] * L_alpha M = L_alpha @@ -445,3 +445,4 @@ def fit_transform(self, X, y=None): """ self.fit(X) return self.embedding_ + From a6975d85eadf141d19dd26baf10d5e269971dedc Mon Sep 17 00:00:00 2001 From: Marcel Falkiewicz Date: Mon, 1 Jan 2018 21:41:09 +0100 Subject: [PATCH 5/7] Revert "Row sum 1 -> Col sum 1" This reverts commit ac9a191354d71b36d00c4c2b302b5577db9e89c9. --- mapalign/embed.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mapalign/embed.py b/mapalign/embed.py index 1f89215..a4b5543 100644 --- a/mapalign/embed.py +++ b/mapalign/embed.py @@ -100,11 +100,11 @@ def compute_diffusion_map(L, alpha=0.5, n_components=None, diffusion_time=0, L_alpha = L_alpha * d_alpha[np.newaxis, :] # Step 3 - d_alpha = np.power(np.array(L_alpha.sum(axis=0)).flatten(), -1) + d_alpha = np.power(np.array(L_alpha.sum(axis=1)).flatten(), -1) if use_sparse: L_alpha.data *= d_alpha[L_alpha.indices] else: - L_alpha = d_alpha[np.newaxis, :] * L_alpha + L_alpha = d_alpha[:, np.newaxis] * L_alpha M = L_alpha @@ -445,4 +445,3 @@ def fit_transform(self, X, y=None): """ self.fit(X) return self.embedding_ - From f5bd9504018a5110a9cbd3524aca4ab9f4a398c9 Mon Sep 17 00:00:00 2001 From: Marcel Falkiewicz Date: Mon, 1 Jan 2018 22:23:27 +0100 Subject: [PATCH 6/7] Moved markov matrix computation outside of main function --- mapalign/embed.py | 57 ++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/mapalign/embed.py b/mapalign/embed.py index a4b5543..cc3f0a0 100644 --- a/mapalign/embed.py +++ b/mapalign/embed.py @@ -81,32 +81,8 @@ def compute_diffusion_map(L, alpha=0.5, n_components=None, diffusion_time=0, raise ImportError('Checks require scikit-learn, but not found') ndim = L.shape[0] - if overwrite: - L_alpha = L - else: - L_alpha = L.copy() - if alpha > 0: - # Step 2 - d = np.array(L_alpha.sum(axis=1)).flatten() - d_alpha = np.power(d, -alpha) - if use_sparse: - L_alpha.data *= d_alpha[L_alpha.indices] - L_alpha = sps.csr_matrix(L_alpha.transpose().toarray()) - L_alpha.data *= d_alpha[L_alpha.indices] - L_alpha = sps.csr_matrix(L_alpha.transpose().toarray()) - else: - L_alpha = d_alpha[:, np.newaxis] * L_alpha - L_alpha = L_alpha * d_alpha[np.newaxis, :] - - # Step 3 - d_alpha = np.power(np.array(L_alpha.sum(axis=1)).flatten(), -1) - if use_sparse: - L_alpha.data *= d_alpha[L_alpha.indices] - else: - L_alpha = d_alpha[:, np.newaxis] * L_alpha - - M = L_alpha + M = _compute_markov_matrix(L, use_sparse = use_sparse, alpha = alpha) from scipy.sparse.linalg import eigs, eigsh if eigen_solver is None: @@ -134,6 +110,37 @@ def compute_diffusion_map(L, alpha=0.5, n_components=None, diffusion_time=0, return_result) +def _compute_markov_matrix(A, use_sparse, alpha = 0): + """ + Computes Markov matrix from affinity matrix. Density normalization is optional. + + :param A: Affinity matrix + :param alpha: Diffusion operator parameter + :return: Transition matrix + """ + + if alpha > 0: + # Step 2 + d = np.array(A.sum(axis=1)).flatten() + d_alpha = np.power(d, -alpha) + if use_sparse: + A.data *= d_alpha[A.indices] + A = sps.csr_matrix(A.transpose().toarray()) + A.data *= d_alpha[A.indices] + A = sps.csr_matrix(A.transpose().toarray()) + else: + A = d_alpha[:, np.newaxis] * A + A = A * d_alpha[np.newaxis, :] + + # Step 3 + d_alpha = np.power(np.array(A.sum(axis=1)).flatten(), -1) + if use_sparse: + A.data *= d_alpha[A.indices] + else: + A = d_alpha[:, np.newaxis] * A + + return A + def _step_5(lambdas, vectors, ndim, n_components, diffusion_time, return_result): """ This is a helper function for diffusion map computation. From 43e0b1563e1fefbc9e82a369b8a21d51cbb2a7de Mon Sep 17 00:00:00 2001 From: Marcel Falkiewicz Date: Tue, 2 Jan 2018 19:50:41 +0100 Subject: [PATCH 7/7] Return affinity matrix, but not as part of self --- mapalign/embed.py | 82 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/mapalign/embed.py b/mapalign/embed.py index cc3f0a0..d6f98b6 100644 --- a/mapalign/embed.py +++ b/mapalign/embed.py @@ -366,23 +366,25 @@ def _get_affinity_matrix(self, X, Y=None): self.n_neighbors_ = (self.n_neighbors if self.n_neighbors is not None else max(int(X.shape[0] / 10), 1)) - self.affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_, + affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_, include_self=True, n_jobs=self.n_jobs) # currently only symmetric affinity_matrix supported - self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ + + affinity_matrix_ = 0.5 * (self.affinity_matrix_ + self.affinity_matrix_.T) if self.affinity == 'rbf': self.gamma_ = (self.gamma if self.gamma is not None else 1.0 / X.shape[1]) - self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_) + affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_) if self.affinity in ['markov', 'cauchy']: from .dist import compute_affinity - self.affinity_matrix_ = compute_affinity(X, + affinity_matrix_ = compute_affinity(X, method=self.affinity, eps=self.gamma, metric=self.metric) + return affinity_matrix_ + def fit(self, X, y=None): """Fit the model from data in X. @@ -418,7 +420,8 @@ def fit(self, X, y=None): raise ValueError(("'affinity' is expected to be an affinity " "name or a callable. Got: %s") % self.affinity) - self._get_affinity_matrix(X) + self.affinity_ = self._get_affinity_matrix(X) + if self.use_variant: self.embedding_ = compute_diffusion_map_psd(self.affinity_matrix_, alpha=self.alpha, @@ -452,3 +455,72 @@ def fit_transform(self, X, y=None): """ self.fit(X) return self.embedding_ + + class AlternatingDiffusion(DiffusionMapEmbedding): + """Alternating diffusion framework that captures common souces of variability acrossdatasets. + """ + + def _diffuse(self, K, t): + """ + Moves Markov kernel K t steps forward in time. + + :param K: Markov kernel + :param t: time + :return: kernel moved forward in time + """ + + return np.power(K, t) + + + def fit(self, D1, D2, time): + """ + + :param D1: Dataset 1 + :param D2: Dataset 2 + :param time: Time steps for simple diffusion + :return: self + """ + + from sklearn.utils import check_array, check_random_state + D1 = check_array(D1, ensure_min_samples=2, estimator=self) + D2 = check_array(D2, ensure_min_samples=2, estimator=self) + + random_state = check_random_state(self.random_state) + if isinstance(self.affinity, (str,)): + if self.affinity not in set(("nearest_neighbors", "rbf", + "markov", "cauchy", + "precomputed")): + raise ValueError(("%s is not a valid affinity. Expected " + "'precomputed', 'rbf', 'nearest_neighbors' " + "or a callable.") % self.affinity) + elif not callable(self.affinity): + raise ValueError(("'affinity' is expected to be an affinity " + "name or a callable. Got: %s") % self.affinity) + + self.affinity_D1_ = self._get_affinity_matrix(D1) + self.affinity_D2_ = self._get_affinity_matrix(D2) + + self.Markov_D1_ = _compute_markov_matrix(self.affinity_D1_, use_sparse = False, alpha = self.alpha) + self.Markov_D2_ = _compute_markov_matrix(self.affinity_D2_, use_sparse = False, alpha = self.alpha) + + self.Markov_D1_t_ = self._diffuse(self.Markov_D1_, time) + self.Markov_D2_t_ = self._diffuse(self.Markov_D2_, time) + + # Create a joint kernel + + self.joint_kernel_ = self.Markov_D1_t_.dot(self.Markov_D2_t_) + + # Final embedding + + self.embedding_ = compute_diffusion_map(self.joint_kernel_, self.alpha, + n_components=self.n_components, + eigen_solver=self.eigen_solver) + + return self + + + def fit_transform(self, D1, D2, time): + + self.fit(D1, D2, time) + return self.embedding_ +