From eca7eeecd5b93bb6be1efbfee62a111025039de7 Mon Sep 17 00:00:00 2001 From: ICB Anna Schaar Date: Wed, 23 Mar 2022 16:02:18 +0100 Subject: [PATCH 1/4] add annotation to figures --- ncem/interpretation/interpreter.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ncem/interpretation/interpreter.py b/ncem/interpretation/interpreter.py index 924badc8..f8f740ff 100644 --- a/ncem/interpretation/interpreter.py +++ b/ncem/interpretation/interpreter.py @@ -1209,8 +1209,8 @@ def sender_receiver_effect_vulcanoplot( ax.set_xlim((-vmax*1.1, vmax*1.1)) ax.set_ylim((-0.5, 15)) - ax.set_xlabel('') - ax.set_ylabel('') + ax.set_xlabel('$\log$ fold change') + ax.set_ylabel('$-\log_{10}$ FDR-corrected pvalues') plt.axvline(-fold_change_threshold, color='black', linestyle='--', ) plt.axvline(fold_change_threshold, color='black', linestyle='--', ) plt.axhline(-np.log10(significance_threshold), linestyle='--', color='black') @@ -1275,7 +1275,7 @@ def sender_effect( sc.set_figure_params(scanpy=True, fontsize=fontsize) if plot_mode == 'qvals': - arr = np.log(self.qvalues[receiver_idx, :, :]) + arr = np.log(self.qvalues[receiver_idx, :, :].copy()) arr[arr < cut_pvals] = cut_pvals df = pd.DataFrame( arr, @@ -1293,7 +1293,7 @@ def sender_effect( cmap='Greys_r', vmin=-5, vmax=0. ) elif plot_mode == 'fold_change': - arr = self.fold_change[receiver_idx, :, :] + arr = self.fold_change[receiver_idx, :, :].copy() arr[np.where(self.qvalues[receiver_idx, :, :] > significance_threshold)] = 0 df = pd.DataFrame( arr, @@ -1308,7 +1308,7 @@ def sender_effect( fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize) sns.heatmap( df.T, - cbar_kws={'label': "fold change", + cbar_kws={'label': "$\log$ fold change", "location": "top"}, cmap="seismic", vmin=-vmax, vmax=vmax, ) @@ -1373,7 +1373,7 @@ def receiver_effect( fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize) sns.heatmap( df.T, - cbar_kws={'label': "fold change", + cbar_kws={'label': "$\log$ fold change", "location": "top"}, cmap="seismic", vmin=-vmax, vmax=vmax, ) From 3c56c9abbd05379f29dc4510bbe2a2e720e7d859 Mon Sep 17 00:00:00 2001 From: ICB Anna Schaar Date: Wed, 23 Mar 2022 16:08:25 +0100 Subject: [PATCH 2/4] add annotation to figures --- ncem/interpretation/interpreter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ncem/interpretation/interpreter.py b/ncem/interpretation/interpreter.py index f8f740ff..00e16d78 100644 --- a/ncem/interpretation/interpreter.py +++ b/ncem/interpretation/interpreter.py @@ -1174,7 +1174,7 @@ def sender_receiver_effect_vulcanoplot( # only significant ones qval_filter = np.where(self.qvalues[receiver_idx,sender_idx,:]>=significance_threshold) vmax = np.max(np.abs(self.fold_change[receiver_idx,sender_idx,:])) - print(vmax) + #print(vmax) # overlaying significant ones with orange sns.scatterplot( From 8fb817864eed6929c1dfc52c246ef5f53dbf293f Mon Sep 17 00:00:00 2001 From: ICB Anna Schaar Date: Wed, 18 May 2022 10:23:37 +0200 Subject: [PATCH 3/4] scog workshop commit --- ncem/data.py | 161 ++++++++++++++++++++++++++++- ncem/interpretation/interpreter.py | 46 +++++---- ncem/utils/wald_test.py | 4 +- 3 files changed, 187 insertions(+), 24 deletions(-) diff --git a/ncem/data.py b/ncem/data.py index fb84860e..ae94d1fe 100644 --- a/ncem/data.py +++ b/ncem/data.py @@ -21,6 +21,51 @@ from tqdm import tqdm +def get_data_custom(interpreter, n_eval_nodes_per_graph: int=10): + interpreter.undefined_node_types = None + interpreter.img_to_patient_dict = interpreter.data.celldata.uns["img_to_patient_dict"] + interpreter.complete_img_keys = list(interpreter.data.img_celldata.keys()) + + interpreter.a = {k: adata.obsp["adjacency_matrix_connectivities"] for k, adata in interpreter.data.img_celldata.items()} + interpreter.h_0 = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()} + interpreter.h_1 = {k: adata.X for k, adata in interpreter.data.img_celldata.items()} + interpreter.node_types = {k: adata.obsm["node_types"] for k, adata in interpreter.data.img_celldata.items()} + interpreter.node_type_names = interpreter.data.celldata.uns["node_type_names"] + interpreter.n_features_type = list(interpreter.node_types.values())[0].shape[1] + interpreter.n_features_standard = interpreter.data.celldata.shape[1] + interpreter.node_feature_names = list(interpreter.data.celldata.var_names) + interpreter.size_factors = interpreter.data.size_factors() + + # Add covariates: + # Add graph-level covariate information + interpreter.graph_covar_names = interpreter.data.celldata.uns["graph_covariates"]["label_names"] + + interpreter.graph_covar = {k: np.array([], ndmin=1) for k, adata in interpreter.data.img_celldata.items()} + # Add node-level conditional information + interpreter.node_covar = {k: np.empty((adata.shape[0], 0)) for k, adata in interpreter.data.img_celldata.items()} + + # Set selection-specific tensor dimensions: + interpreter.n_features_0 = list(interpreter.h_0.values())[0].shape[1] + interpreter.n_features_1 = list(interpreter.h_1.values())[0].shape[1] + interpreter.n_graph_covariates = list(interpreter.graph_covar.values())[0].shape[0] + interpreter.n_node_covariates = list(interpreter.node_covar.values())[0].shape[1] + interpreter.max_nodes = max([interpreter.a[i].shape[0] for i in interpreter.complete_img_keys]) + + interpreter.domains = {key: i for i, key in enumerate(interpreter.complete_img_keys)} + interpreter.n_domains = len(np.unique(list(interpreter.domains.values()))) + + # Report summary statistics of loaded graph: + print( + "Mean of mean node degree per images across images: %f" + % np.mean([np.mean(v.sum(axis=1)) for k, v in interpreter.a.items()]) + ) + + # splitting data into test and validation sets, can be ignored for non sender-receiver focused analysis + interpreter.split_data_node(0.1, 0.1) + interpreter.n_eval_nodes_per_graph = n_eval_nodes_per_graph + interpreter.cell_names = list(interpreter.data.celldata.uns['node_type_names'].values()) + + class GraphTools: """GraphTools class.""" @@ -756,7 +801,7 @@ def compute_cluster_enrichment( adata, adata_substates, log_pval, fold_change """ titles = list(self.celldata.uns["node_type_names"].values()) - sorce_type_names = [f"source type {x.replace('_', ' ')}" for x in titles] + sorce_type_names = [f"source type {x}" for x in titles] pbar_total = len(self.img_celldata.keys()) + len(self.img_celldata.keys()) + len(titles) with tqdm(total=pbar_total) as pbar: @@ -1761,6 +1806,120 @@ def size_factors(self): @property def var_names(self): return self.celldata.var_names + + +class customLoader(DataLoader): + + def __init__( + self, + adata, + cluster, + patient, + library_id, + radius, + coord_type='generic', + n_rings=1, + n_top_genes=None, + label_selection=None + ): + self.adata = adata.copy() + self.cluster = cluster + self.patient = patient + self.library_id = library_id + + print("Loading data from raw files") + self.register_celldata(n_top_genes=n_top_genes) + self.register_img_celldata() + self.register_graph_features(label_selection=label_selection) + self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings) + self.radius = radius + + print( + "Loaded %i images with complete data from %i patients " + "over %i cells with %i cell features and %i distinct celltypes." + % ( + len(self.img_celldata), + len(self.patients), + self.celldata.shape[0], + self.celldata.shape[1], + len(self.celldata.uns["node_type_names"]), + ) + ) + + def _register_celldata(self, n_top_genes): + + metadata = { + "cluster_col_preprocessed": self.cluster, + "image_col": self.library_id + } + + celldata = self.adata.copy() + celldata.X = celldata.X.toarray() + celldata.uns["metadata"] = metadata + del celldata.uns['spatial'] + + # register node type names + node_type_names = list(np.unique(celldata.obs[self.cluster])) + celldata.uns["node_type_names"] = {x: x for x in node_type_names} + node_types = np.zeros((celldata.shape[0], len(node_type_names))) + node_type_idx = np.array( + [ + node_type_names.index(x) for x in celldata.obs[self.cluster].values + ] # index in encoding vector + ) + node_types[np.arange(0, node_type_idx.shape[0]), node_type_idx] = 1 + celldata.obsm["node_types"] = node_types + + if self.patient: + img_to_patient_dict = {} + for p in np.unique(celldata.obs[self.patient]): + for i in np.unique(celldata.obs[celldata.obs[self.patient] == p][self.library_id]): + img_to_patient_dict[i] = p + else: + img_to_patient_dict = {"image": "patient"} + celldata.uns["img_to_patient_dict"] = img_to_patient_dict + self.img_to_patient_dict = img_to_patient_dict + + self.celldata = celldata + + def _register_img_celldata(self): + """Load dictionary of of image-wise celldata objects with {imgage key : anndata object of image}.""" + img_celldata = {} + if self.library_id: + for k in np.unique(self.celldata.obs[self.library_id]): + img_celldata[str(k)] = self.celldata[self.celldata.obs[self.library_id] == k].copy() + self.img_celldata = img_celldata + else: + self.img_celldata = {"image": self.celldata} + + def _register_graph_features(self, label_selection): + """Load graph level covariates. + + Parameters + ---------- + label_selection + Label selection. + """ + # Save processed data to attributes. + for adata in self.img_celldata.values(): + graph_covariates = { + "label_names": {}, + "label_tensors": {}, + "label_selection": [], + "continuous_mean": {}, + "continuous_std": {}, + "label_data_types": {}, + } + adata.uns["graph_covariates"] = graph_covariates + + graph_covariates = { + "label_names": {}, + "label_selection": [], + "continuous_mean": {}, + "continuous_std": {}, + "label_data_types": {}, + } + self.celldata.uns["graph_covariates"] = graph_covariates class DataLoaderZhang(DataLoader): diff --git a/ncem/interpretation/interpreter.py b/ncem/interpretation/interpreter.py index 00e16d78..e395b228 100644 --- a/ncem/interpretation/interpreter.py +++ b/ncem/interpretation/interpreter.py @@ -992,7 +992,7 @@ def type_coupling_analysis_circular( fontsize: Optional[int] = None, figsize: Tuple[float, float] = (9, 8), de_genes_threshold: float = 0, - magnitude_threshold: Optional[float] = None, + #magnitude_threshold: Optional[float] = None, save: Optional[str] = None, suffix: str = "_type_coupling_analysis_circular.pdf", show: bool = True, @@ -1021,6 +1021,8 @@ def type_coupling_analysis_circular( (np.abs(x) - np.min(np.abs(network_df[0].values))) / (np.max(np.abs(network_df[0].values)) - np.min(np.abs(network_df[0].values))) for x in network_df[0].values] + network_df["de_genes_abs"] = network_df[0] + if undefined_types: network_df = network_df[network_df['receiver'] != undefined_types] network_df = network_df[network_df['sender'] != undefined_types] @@ -1030,18 +1032,14 @@ def type_coupling_analysis_circular( plt.ioff() fig, ax = plt.subplots(nrows=1, ncols=1, figsize=figsize) ax.axis('off') - if de_genes_threshold: - network_df = network_df[network_df[0]>de_genes_threshold] - if magnitude_threshold: - network_df = network_df[network_df['magnitude']>magnitude_threshold] G=nx.from_pandas_edgelist( network_df, source='sender', target='receiver', - edge_attr=["magnitude", 'de_genes'], + edge_attr=["magnitude", 'de_genes', 'de_genes_abs'], create_using=nx.DiGraph() ) nodes = np.unique(network_df['receiver']) pos=nx.circular_layout(G) - labels_width = nx.get_edge_attributes(G, edge_attr) + if len(nodes) <= 10: nx.set_node_attributes(G, dict([(x,vega_10_scanpy[i]) for i, x in enumerate(nodes)]), "color") elif len(nodes) <= 20: @@ -1069,16 +1067,18 @@ def type_coupling_analysis_circular( for node, t in description.items(): bb = t.get_window_extent(renderer=r) bbdata = bb.transformed(trans) - radius = text_space +bbdata.width/1. + radius = text_space +bbdata.width/2. position = (radius*np.cos(angle_dict[node]),radius* np.sin(angle_dict[node])) t.set_position(position) t.set_rotation(angle_dict[node]*360.0/(2.0*np.pi)) t.set_clip_on(False) - + + selected_edges = [(u,v) for u,v,e in G.edges(data=True) if e['de_genes_abs'] > de_genes_threshold] + width = [e[edge_attr] * edge_width_scale for u,v,e in G.edges(data=True) if e['de_genes_abs'] > de_genes_threshold] + nx.draw_networkx( - G, pos, with_labels=False, node_size=500, - width=[x * edge_width_scale for x in list(labels_width.values())], - edge_vmin=0., edge_vmax=1., edge_cmap=plt.cm.seismic, arrowstyle='-|>', + G, pos, with_labels=False, node_size=500, edgelist=selected_edges, + width=width, edge_vmin=0., edge_vmax=1., edge_cmap=plt.cm.seismic, arrowstyle='-|>', vmin=0., vmax=1., cmap=plt.cm.binary, node_color=list(node_color.values()), ax=ax, connectionstyle='arc3, rad = 0.1' ) @@ -1089,6 +1089,7 @@ def type_coupling_analysis_circular( plt.show() plt.close(fig) plt.ion() + return network_df def sender_receiver_values( self, @@ -1112,6 +1113,7 @@ def sender_receiver_values( np.array([means, pvals, qvals, fold_change]).T, index=self.data.celldata.var_names, columns=['mean expression', 'pvalue', 'qvalue', 'fold change'] ) + df['-log 10 qvalue'] = -np.log10(df['qvalue']) return df @@ -1170,7 +1172,7 @@ def sender_receiver_effect_vulcanoplot( sc.set_figure_params(scanpy=True, fontsize=fontsize) plt.rcParams['axes.grid'] = False fig, ax = plt.subplots(1,1, figsize=figsize) - + ax.grid(False) # only significant ones qval_filter = np.where(self.qvalues[receiver_idx,sender_idx,:]>=significance_threshold) vmax = np.max(np.abs(self.fold_change[receiver_idx,sender_idx,:])) @@ -1208,7 +1210,7 @@ def sender_receiver_effect_vulcanoplot( color='red', edgecolor = 'black', s=100, ax=ax) ax.set_xlim((-vmax*1.1, vmax*1.1)) - ax.set_ylim((-0.5, 15)) + #ax.set_ylim((-0.5, np.min([y[fc_filter], 15]))) ax.set_xlabel('$\log$ fold change') ax.set_ylabel('$-\log_{10}$ FDR-corrected pvalues') plt.axvline(-fold_change_threshold, color='black', linestyle='--', ) @@ -1340,7 +1342,7 @@ def receiver_effect( sc.set_figure_params(scanpy=True, fontsize=fontsize) if plot_mode == 'qvals': - arr = np.log(self.qvalues[:, sender_idx, :]) + arr = np.log(self.qvalues[sender_idx, :, :].copy()) arr[arr < cut_pvals] = cut_pvals df = pd.DataFrame( arr, @@ -1358,7 +1360,7 @@ def receiver_effect( cmap='Greys_r', vmin=-5, vmax=0. ) elif plot_mode == 'fold_change': - arr = self.fold_change[:, sender_idx, :] + arr = self.fold_change[sender_idx, :, :].copy() arr[np.where(self.qvalues[:, sender_idx, :] > significance_threshold)] = 0 df = pd.DataFrame( arr, @@ -1933,7 +1935,7 @@ def get_sender_receiver_effects( "proportions": self.data.celldata.obsm['proportions'] } target = np.asarray(dmatrix("target-1", data)) - interaction_shape = self.model.training_model.inputs[1].shape + interaction_shape = np.int(self.n_features_0**2) interactions = np.asarray(dmatrix("target:proportions-1", data)) y = self.data.celldata.X[self.nodes_idx_all['1'],:] @@ -1957,11 +1959,11 @@ def get_sender_receiver_effects( is_sign, pvalues, qvalues = wald_test( params=params, fisher_inv=fim_inv, significance_threshold=significance_threshold ) - interaction_shape = self.model.training_model.inputs[1].shape + interaction_shape = np.int(self.n_features_0**2) # subset to interaction terms - is_sign = is_sign[self.n_features_0 : interaction_shape[-1] + self.n_features_0, :] - pvalues = pvalues[self.n_features_0 : interaction_shape[-1] + self.n_features_0, :] - qvalues = qvalues[self.n_features_0 : interaction_shape[-1] + self.n_features_0, :] + is_sign = is_sign[self.n_features_0 : interaction_shape + self.n_features_0, :] + pvalues = pvalues[self.n_features_0 : interaction_shape + self.n_features_0, :] + qvalues = qvalues[self.n_features_0 : interaction_shape + self.n_features_0, :] self.pvalues = np.concatenate( np.expand_dims(np.split(pvalues, indices_or_sections=np.sqrt(pvalues.shape[0]), axis=0), axis=0), @@ -1976,7 +1978,7 @@ def get_sender_receiver_effects( axis=0, ) - interaction_params = params[:, self.n_features_0 : interaction_shape[-1] + self.n_features_0] + interaction_params = params[:, self.n_features_0 : interaction_shape + self.n_features_0] self.fold_change = np.concatenate( np.expand_dims(np.split(interaction_params.T, indices_or_sections=np.sqrt(interaction_params.T.shape[0]), axis=0), axis=0), axis=0, diff --git a/ncem/utils/wald_test.py b/ncem/utils/wald_test.py index 0e76b2f4..8806e73c 100644 --- a/ncem/utils/wald_test.py +++ b/ncem/utils/wald_test.py @@ -4,7 +4,9 @@ def get_fim_inv(x: np.array, y: np.array): var = np.var(y, axis=0) fim = np.expand_dims(np.matmul(x.T, x), axis=0) / np.expand_dims(var, axis=[1, 2]) - + + fim = np.nan_to_num(fim) + fim_inv = np.array([ np.linalg.pinv(fim[i, :, :]) for i in range(fim.shape[0]) From 8c846eef5c8561f70b91d82481b5e39bcc791689 Mon Sep 17 00:00:00 2001 From: Anna Schaar Date: Wed, 18 May 2022 11:37:53 +0200 Subject: [PATCH 4/4] Bump version from 0.1.2 to 0.1.3 --- .cookietemple.yml | 2 +- .github/release-drafter.yml | 4 ++-- cookietemple.cfg | 2 +- docs/conf.py | 4 ++-- ncem/__init__.py | 2 +- pyproject.toml | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.cookietemple.yml b/.cookietemple.yml index ea400cb5..ed24439d 100644 --- a/.cookietemple.yml +++ b/.cookietemple.yml @@ -15,5 +15,5 @@ full_name: Anna Schaar email: anna.schaar@helmholtz-muenchen.de project_name: ncem project_short_description: ncem. Learning cell communication from spatial graphs of cells. -version: 0.1.2 +version: 0.1.3 license: BSD-3-Clause diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 9bd97a98..0fdc078b 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,5 +1,5 @@ -name-template: "0.1.2 🌈" # <> -tag-template: 0.1.2 # <> +name-template: "0.1.3 🌈" # <> +tag-template: 0.1.3 # <> categories: - title: "🚀 Features" labels: diff --git a/cookietemple.cfg b/cookietemple.cfg index fcc70d14..8f854928 100644 --- a/cookietemple.cfg +++ b/cookietemple.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.2 +current_version = 0.1.3 [bumpversion_files_whitelisted] init_file = ncem/__init__.py diff --git a/docs/conf.py b/docs/conf.py index 14ccfaa1..79aa85b2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -73,9 +73,9 @@ # the built documents. # # The short X.Y version. -version = "0.1.2" +version = "0.1.3" # The full version, including alpha/beta/rc tags. -release = "0.1.2" +release = "0.1.3" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/ncem/__init__.py b/ncem/__init__.py index 4dddd161..6864fd2c 100644 --- a/ncem/__init__.py +++ b/ncem/__init__.py @@ -8,4 +8,4 @@ __maintainer__ = ", ".join(["Anna C. Schaar", "David S. Fischer"]) __author__ = ", ".join(["Anna C. Schaar", "David S. Fischer"]) __email__ = ", ".join(["anna.schaar@helmholtz-muenchen.de", "david.fischer@helmholtz-muenchen.de"]) -__version__ = "0.1.2" +__version__ = "0.1.3" diff --git a/pyproject.toml b/pyproject.toml index 6232706a..71738acd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ncem" -version = "0.1.2" # <> +version = "0.1.3" # <> description = "ncem. Learning cell communication from spatial graphs of cells." authors = ["Anna C. Schaar "] license = "BSD"