You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Change niche flavor to cellcharter_simple and default distance = 3 (#978)
* Change niche flavor to cellcharter_simple and default distance = 3
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
* added warning message
---------
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Tim Treis <tim.treis@helmholtz-munich.de>
Co-authored-by: Tim Treis <tim.treis@stud.uni-heidelberg.de>
Method to use for niche calculation. Available options are:
59
60
- `{fla.NEIGHBORHOOD.s!r}` - cluster the neighborhood profile.
60
61
- `{fla.UTAG.s!r}` - use utag algorithm (matrix multiplication).
61
-
- `{fla.CELLCHARTER.s!r}` - cluster adjacency matrix with Gaussian Mixture Model (GMM) using CellCharter's approach.
62
+
- `{fla.CELLCHARTER.s!r}` - a simplified version of CellCharter's approach, using PCA for dimensionality reduction. An arbitrary embedding can be used instead of PCA by setting the `use_rep` parameter which will try to find the embedding in `adata.obsm`.
62
63
%(library_key)s
63
64
If provided, niches will be calculated separately for each unique value in this column.
64
65
Each niche will be prefixed with the library identifier.
@@ -103,6 +104,9 @@ def calculate_niche(
103
104
Optional if flavor == `{fla.CELLCHARTER.s!r}`.
104
105
spatial_connectivities_key
105
106
Key in `adata.obsp` where spatial connectivities are stored.
107
+
use_rep
108
+
Key in `adata.obsm` where the embedding is stored. If provided, this embedding will be used instead of PCA for dimensionality reduction.
109
+
Optional if flavor == `{fla.CELLCHARTER.s!r}`.
106
110
inplace
107
111
If 'True', perform the operation in place.
108
112
If 'False', return a new AnnData object with the niche labels.
@@ -111,6 +115,12 @@ def calculate_niche(
111
115
ifflavor=="cellcharter"andaggregationisNone:
112
116
aggregation="mean"
113
117
118
+
ifdistanceisNone:
119
+
distance=3ifflavor=="cellcharter"else1
120
+
121
+
ifflavor=="cellcharter"andn_componentsisNone:
122
+
n_components=10
123
+
114
124
_validate_niche_args(
115
125
data,
116
126
flavor,
@@ -127,15 +137,13 @@ def calculate_niche(
127
137
aggregation,
128
138
n_components,
129
139
random_state,
140
+
use_rep,
130
141
inplace,
131
142
)
132
143
133
144
ifresolutionsisNone:
134
145
resolutions= [0.5]
135
146
136
-
ifdistanceisNone:
137
-
distance=1
138
-
139
147
ifisinstance(data, SpatialData):
140
148
orig_adata=data.tables[table_key]
141
149
adata=orig_adata.copy()
@@ -225,6 +233,7 @@ def calculate_niche(
225
233
n_components,
226
234
random_state,
227
235
spatial_connectivities_key,
236
+
use_rep,
228
237
)
229
238
230
239
ifnotinplace:
@@ -293,6 +302,7 @@ def _calculate_niches(
293
302
n_components: int|None,
294
303
random_state: int,
295
304
spatial_connectivities_key: str,
305
+
use_rep: str|None,
296
306
) ->None:
297
307
"""Calculate niches using the specified flavor and parameters."""
298
308
ifflavor=="neighborhood":
@@ -321,6 +331,7 @@ def _calculate_niches(
321
331
n_components,
322
332
random_state,
323
333
spatial_connectivities_key,
334
+
use_rep,
324
335
)
325
336
326
337
@@ -470,6 +481,7 @@ def _get_cellcharter_niches(
470
481
n_components: int,
471
482
random_state: int,
472
483
spatial_connectivities_key: str,
484
+
use_rep: str|None=None,
473
485
) ->None:
474
486
"""adapted from https://github.com/CSOgroup/cellcharter/blob/main/src/cellcharter/gr/_aggr.py
475
487
and https://github.com/CSOgroup/cellcharter/blob/main/src/cellcharter/tl/_gmm.py"""
concatenated_matrix=hstack(aggregated_matrices) # Stack all matrices horizontally
496
508
arr=concatenated_matrix.toarray() # Densify
497
-
arr_ad=ad.AnnData(X=arr)
498
-
sc.tl.pca(arr_ad)
509
+
510
+
ifuse_repisnotNone:
511
+
# Use provided embedding from adata.obsm
512
+
ifuse_repnotinadata.obsm:
513
+
raiseKeyError(
514
+
f"Embedding key '{use_rep}' not found in adata.obsm. Available keys: {list(adata.obsm.keys())}"
515
+
)
516
+
embedding=adata.obsm[use_rep]
517
+
# Ensure embedding has the right number of components
518
+
ifembedding.shape[1] <n_components:
519
+
raiseValueError(
520
+
f"Embedding has {embedding.shape[1]} components, but n_components={n_components}. Please provide an embedding with at least {n_components} components."
521
+
)
522
+
# Use only the first n_components
523
+
embedding=embedding[:, :n_components]
524
+
else:
525
+
logg.warning(
526
+
"CellCharter recommends to use a dimensionality reduced embedding of the data, e.g. a scVI embedding. Since 'use_rep' is not provided, PCA will be used as proxy - performance may be suboptimal."
527
+
)
528
+
529
+
arr_ad=ad.AnnData(X=arr)
530
+
sc.tl.pca(arr_ad)
531
+
embedding=arr_ad.obsm["X_pca"]
499
532
500
533
# cluster concatenated matrix with GMM, each cluster label equals to a niche label
0 commit comments