Skip to content

Commit

Permalink
fixed bugs in data_graph, dpt and tsne
Browse files Browse the repository at this point in the history
  • Loading branch information
falexwolf committed Jul 27, 2017
1 parent f0cfad6 commit 86330a9
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 19 deletions.
29 changes: 15 additions & 14 deletions scanpy/data_structs/data_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,19 @@ def __init__(self,
recompute_graph=None,
flavor='haghverdi16'):
self.sym = True # we do not allow asymetric cases
self.flavor = flavor # this is to experiment around
self.n_pcs = n_pcs
self.n_dcs = n_dcs
# use the graph in adata
if (not recompute_graph
# make sure X_diffmap is there
and 'X_diffmap' in adata.smp
and adata.smp['X_diffmap'].shape[1] >= n_dcs-1):
self.n_pcs = n_pcs
self.n_dcs = n_dcs
# make sure enough DCs are there
and adata.smp['X_diffmap'].shape[1] >= n_dcs-1
# make sure that it's sparse
and issparse(adata.add['Ktilde']) == knn
# make sure n_neighbors matches
and k == adata.add['distance'][0].nonzero()[0].size + 1):
self.init_iroot_directly(adata)
self.X = adata.X # this is a hack, PCA?
self.knn = issparse(adata.add['Ktilde'])
Expand All @@ -188,7 +195,7 @@ def __init__(self,
if self.knn:
self.k = adata.add['distance'][0].nonzero()[0].size + 1
else:
self.k = adata.X.shape[0]
self.k = None # currently do not store this, is unknown
# for output of spectrum
self.X_diffmap = adata.smp['X_diffmap'][:, :n_dcs-1]
self.evals = np.r_[1, adata.add['diffmap_evals'][:n_dcs-1]]
Expand All @@ -205,7 +212,7 @@ def __init__(self,
# recompute the graph
else:
self.k = k if k is not None else 30
logg.info('compute data graph with `n_neighbors={}`'
logg.info(' computing data graph with n_neighbors = {} '
.format(self.k))
self.evals = None
self.rbasis = None
Expand All @@ -214,13 +221,8 @@ def __init__(self,
self.Dsq = None
self.knn = knn
self.n_jobs = sett.n_jobs if n_jobs is None else n_jobs
self.n_pcs = n_pcs
self.n_dcs = n_dcs
self.flavor = flavor # this is to experiment around
self.iroot = None
self.X = adata.X # might be overwritten with X_pca below
self.Dchosen = None
self.M = None
self.init_iroot_and_X_from_PCA(adata, recompute_pca, n_pcs)
if False: # TODO
# in case we already computed distance relations
Expand All @@ -233,16 +235,15 @@ def __init__(self,
self.Dsq = adata.add['distance']

def init_iroot_directly(self, adata):
self.iroot = None
if 'iroot' in adata.add:
if adata.add['iroot'] >= adata.n_smps:
logg.warn('Root cell index {} does not exist for {} samples. '
'Is ignored.'
.format(adata.add['iroot'], adata.n_smps))
self.iroot = None
else:
self.iroot = adata.add['iroot']



def init_iroot_and_X_from_PCA(self, adata, recompute_pca, n_pcs):
# retrieve xroot
xroot = None
Expand Down Expand Up @@ -281,7 +282,7 @@ def update_diffmap(self, n_comps=None):
self.n_dcs = n_comps
logg.info(' updating number of DCs to', self.n_dcs)
if self.evals is None or self.evals.size < self.n_dcs:
logg.info('computing spectral decomposition ("diffmap") with',
logg.info(' computing spectral decomposition ("diffmap") with',
self.n_dcs, 'components', r=True)
self.compute_transition_matrix()
self.embed(n_evals=self.n_dcs)
Expand Down
4 changes: 2 additions & 2 deletions scanpy/tools/dpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def dpt(adata, n_branchings=0, n_neighbors=30, knn=True, n_pcs=50, n_dcs=10,
logg.hint(msg)
if n_branchings == 0:
logg.m('set parameter `n_branchings` > 0 to detect branchings', v='hint')
logg.m('perform Diffusion Pseudotime analysis', r=True)
dpt = DPT(adata, n_neighbors=n_neighbors, knn=knn, n_pcs=n_pcs, n_dcs=n_dcs,
min_group_size=min_group_size, n_jobs=n_jobs,
recompute_graph=recompute_graph, recompute_pca=recompute_pca,
Expand All @@ -118,8 +119,7 @@ def dpt(adata, n_branchings=0, n_neighbors=30, knn=True, n_pcs=50, n_dcs=10,
adata.add['diffmap_evals'] = dpt.evals[1:]
if knn: adata.add['distance'] = dpt.Dsq
if knn: adata.add['Ktilde'] = dpt.Ktilde
logg.m('perform Diffusion Pseudotime analysis', r=True)
if n_branchings > 1: logg.info('... this uses a hierarchical implementation')
if n_branchings > 1: logg.info(' this uses a hierarchical implementation')
# compute DPT distance matrix, which we refer to as 'Ddiff'
if dpt.iroot is not None:
dpt.set_pseudotime() # pseudotimes are distances from root point
Expand Down
6 changes: 3 additions & 3 deletions scanpy/tools/tsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def tsne(adata, random_state=0, n_pcs=50, perplexity=30, learning_rate=None,
Multicore-TSNE
GitHub (2017)
"""
logg.info('compute tSNE', r=True)
logg.info('computing tSNE', r=True)
adata = adata.copy() if copy else adata
# preprocessing by PCA
if 'X_pca' in adata.smp and adata.smp['X_pca'].shape[1] >= n_pcs:
Expand All @@ -74,7 +74,7 @@ def tsne(adata, random_state=0, n_pcs=50, perplexity=30, learning_rate=None,
else:
if n_pcs > 0 and adata.X.shape[1] > n_pcs:
logg.info(' preprocess using PCA with', n_pcs, 'PCs')
logg.info('avoid this by setting n_pcs = 0', v='hint')
logg.hint('avoid this by setting n_pcs = 0')
X = pca(adata.X, random_state=random_state, n_comps=n_pcs)
adata.smp['X_pca'] = X
logg.info(' using', n_pcs, 'principal components')
Expand All @@ -95,7 +95,7 @@ def tsne(adata, random_state=0, n_pcs=50, perplexity=30, learning_rate=None,
from MulticoreTSNE import MulticoreTSNE as TSNE
params_sklearn['learning_rate'] = 200 if learning_rate is None else learning_rate
tsne = TSNE(n_jobs=n_jobs, **params_sklearn)
logg.info(' using package MulticoreTSNE')
logg.info(' using package MulticoreTSNE by D. Ulyanov')
X_tsne = tsne.fit_transform(X.astype(np.float64))
except ImportError:
multicore_failed = True
Expand Down

0 comments on commit 86330a9

Please sign in to comment.