From 5535813172d73b96772768a741c34a71a480b8e8 Mon Sep 17 00:00:00 2001
From: Saul Goodenough <littlebiao@outlook.com>
Date: Fri, 4 Oct 2024 10:11:22 +0800
Subject: [PATCH] Delete some files

---
 modify_packages.md                            |  78 -----
 run_came/Clustering_script_human.py           | 170 -----------
 run_came/Clustering_script_human_3d.py        | 193 ------------
 run_came/Clustering_script_human_3d_16.py     | 191 ------------
 run_came/Clustering_script_mouse.py           | 170 -----------
 run_came/brain_voxel_sample.py                | 268 ----------------
 run_came/brain_voxel_sample_human_mouse.py    | 273 -----------------
 .../brain_voxel_sample_human_mouse_16_11.py   | 273 -----------------
 run_came/brain_voxel_sample_mouse_human.py    | 273 -----------------
 .../brain_voxel_sample_mouse_human_16_11.py   | 273 -----------------
 ...l_sample_mouse_human_16_11_no_threshold.py | 273 -----------------
 ...n_voxel_sample_mouse_human_no_threshold.py | 273 -----------------
 ...ample_mouse_human_no_threshold_dilution.py | 288 ------------------
 ...ample_mouse_human_no_threshold_sagittal.py | 273 -----------------
 run_came/pca_script_human.py                  |  85 ------
 run_came/pca_script_mouse.py                  |  81 -----
 run_came/run_came_plot.py                     | 281 -----------------
 17 files changed, 3716 deletions(-)
 delete mode 100644 modify_packages.md
 delete mode 100644 run_came/Clustering_script_human.py
 delete mode 100644 run_came/Clustering_script_human_3d.py
 delete mode 100644 run_came/Clustering_script_human_3d_16.py
 delete mode 100644 run_came/Clustering_script_mouse.py
 delete mode 100644 run_came/brain_voxel_sample.py
 delete mode 100644 run_came/brain_voxel_sample_human_mouse.py
 delete mode 100644 run_came/brain_voxel_sample_human_mouse_16_11.py
 delete mode 100644 run_came/brain_voxel_sample_mouse_human.py
 delete mode 100644 run_came/brain_voxel_sample_mouse_human_16_11.py
 delete mode 100644 run_came/brain_voxel_sample_mouse_human_16_11_no_threshold.py
 delete mode 100644 run_came/brain_voxel_sample_mouse_human_no_threshold.py
 delete mode 100644 run_came/brain_voxel_sample_mouse_human_no_threshold_dilution.py
 delete mode 100644 run_came/brain_voxel_sample_mouse_human_no_threshold_sagittal.py
 delete mode 100644 run_came/pca_script_human.py
 delete mode 100644 run_came/pca_script_mouse.py
 delete mode 100644 run_came/run_came_plot.py

diff --git a/modify_packages.md b/modify_packages.md
deleted file mode 100644
index f3dd170..0000000
--- a/modify_packages.md
+++ /dev/null
@@ -1,78 +0,0 @@
-# Brain Alignment
-
-## Data Process
-
-## Run Commands
-
-## install packages
-```shell
-conda create -n brainalign python=3.8
-conda activate brainalign
-
-pip install "scanpy[leiden]"
-pip install torch  # >=1.8
-pip install dgl
-
-pip install yacs --upgrade
-
-pip install xgboost
-
-pip install scGeneFit
-
-pip install colorcet
-README.md
-pip install imblearn
-
-pip install gseapy
-```
-
-# Modification of the other packages
-## gseapy
-- Add fig to gseapy.dotplot's return, i.e., change
-```python
- if ofname is None:
-        return ax
-    dot.fig.savefig(ofname, bbox_inches="tight", dpi=300)
-```
-to:
-```python
- if ofname is None:
-        return ax
-    dot.fig.savefig(ofname, bbox_inches="tight", dpi=300)
-```
-- Change dotplot colorbar title pad, i.e., change
-```python
-cbar.ax.set_title(self.cbar_title, loc="left", fontweight="bold")
-```
-```python
-cbar.ax.set_title(self.cbar_title, loc="left", fontweight="bold", pad=20)
-```
-
-- delete '\n' in dotplot colorbar title
-```python
-ax.legend(
-            handles,
-            labels,
-            title="% Genes in set",
-            bbox_to_anchor=(1.02, 0.9),
-            loc="upper left",
-            frameon=False,
-            labelspacing=1.0,
-        )
-```
-
-## Scanpy
-### UMAP
-Add legend position in line 1109
-```python
-    if legend_loc == 'lower center':
-        for label in cats:
-            ax.scatter([], [], c=palette[label], label=label)
-        ax.legend(
-            frameon=False,
-            loc='lower center',
-            bbox_to_anchor=(0.45, -0.3),
-            ncol=(1 if len(cats) <= 14 else 2 if len(cats) <= 30 else 3),
-            fontsize=legend_fontsize,
-        )
-```
\ No newline at end of file
diff --git a/run_came/Clustering_script_human.py b/run_came/Clustering_script_human.py
deleted file mode 100644
index 350bafe..0000000
--- a/run_came/Clustering_script_human.py
+++ /dev/null
@@ -1,170 +0,0 @@
-
-
-import scanpy as sc, anndata as ad
-import numpy as np
-import pandas as pd
-from sklearn.cluster import KMeans
-from sklearn.metrics import adjusted_rand_score
-
-
-if __name__ == "__main__":
-
-    
-    path_rawdata_mouse = "./brain_human_mouse/human_brain_region_88_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-    
-    '''
-    sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'], return_fig=True).savefig('./figs/mouse_67/leiden.png')
-    sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    sc.pl.dendrogram(adata, groupby = "leiden_0.6", return_fig=True).savefig('./figs/mouse_67/leiden_groupby.png')
-    
-    
-    sc.tl.louvain(adata, key_added = "louvain_1.0") # default resolution in 1.0
-    sc.tl.louvain(adata, resolution = 0.6, key_added = "louvain_0.6")
-    sc.tl.louvain(adata, resolution = 0.4, key_added = "louvain_0.4")
-    sc.tl.louvain(adata, resolution = 1.4, key_added = "louvain_1.4")
-
-    sc.pl.umap(adata, color=['louvain_0.4', 'louvain_0.6', 'louvain_1.0','louvain_1.4'], return_fig=True).savefig('./figs/mouse_67/louvain.png')
-    
-    sc.tl.dendrogram(adata, groupby = "louvain_0.6")
-    sc.pl.dendrogram(adata, groupby = "louvain_0.6", return_fig=True).savefig('./figs/mouse_67/louvain_groupby.png')
-    '''
-    
-    	
-    
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/human_88/pca.png')
-    
-    
-    
-    sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20)
-    sc.tl.umap(adata)
-    
-    sc.pl.umap(adata, color=['region_name'], return_fig=True).savefig('./figs/human_88/umap.png')
-    
-    print(adata)
-   
-
-    # extract pca coordinates
-    X_pca = adata.obsm['X_pca'] 
-
-    # kmeans with k=5
-    kmeans = KMeans(n_clusters=5, random_state=0).fit(X_pca) 
-    adata.obs['kmeans5'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=11
-    kmeans = KMeans(n_clusters=16, random_state=0).fit(X_pca) 
-    adata.obs['kmeans16'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=20
-    kmeans = KMeans(n_clusters=30, random_state=0).fit(X_pca) 
-    adata.obs['kmeans30'] = kmeans.labels_.astype(str)
-    
-     # kmeans with k=67
-    kmeans = KMeans(n_clusters=88, random_state=0).fit(X_pca) 
-    adata.obs['kmeans88'] = kmeans.labels_.astype(str)
-
-    sc.pl.umap(adata, color=['kmeans5', 'kmeans16', 'kmeans30', 'kmeans88'], return_fig=True).savefig('./figs/human_88/k-means.png')
-    
-    
-    
-    
-    
-    path_rawdata_mouse = "./brain_human_mouse_16_11/human_brain_region_16_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/human_16/pca.png')
-    
-    
-    sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20)
-    sc.tl.umap(adata)
-    
-    sc.pl.umap(adata, color=['region_name'], return_fig=True).savefig('./figs/human_16/umap.png')
-    
-    print(adata)
-   
-
-    # extract pca coordinates
-    X_pca = adata.obsm['X_pca'] 
-
-    # kmeans with k=5
-    kmeans = KMeans(n_clusters=5, random_state=0).fit(X_pca) 
-    adata.obs['kmeans5'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=11
-    kmeans = KMeans(n_clusters=16, random_state=0).fit(X_pca) 
-    adata.obs['kmeans16'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=20
-    kmeans = KMeans(n_clusters=30, random_state=0).fit(X_pca) 
-    adata.obs['kmeans30'] = kmeans.labels_.astype(str)
-    
-     # kmeans with k=67
-    #kmeans = KMeans(n_clusters=67, random_state=0).fit(X_pca) 
-    #adata.obs['kmeans67'] = kmeans.labels_.astype(str)
-
-    sc.pl.umap(adata, color=['kmeans5', 'kmeans16', 'kmeans30'], return_fig=True).savefig('./figs/human_16/k-means.png')    
diff --git a/run_came/Clustering_script_human_3d.py b/run_came/Clustering_script_human_3d.py
deleted file mode 100644
index 590aaaa..0000000
--- a/run_came/Clustering_script_human_3d.py
+++ /dev/null
@@ -1,193 +0,0 @@
-
-
-import scanpy as sc, anndata as ad
-import numpy as np
-import pandas as pd
-from sklearn.cluster import KMeans
-from sklearn.metrics import adjusted_rand_score
-import matplotlib.pyplot as plt
-
-import re, seaborn as sns
-
-from mpl_toolkits.mplot3d import Axes3D
-from matplotlib.colors import ListedColormap
-
-
-
-if __name__ == "__main__":
-
-    
-    path_rawdata_mouse = "./brain_human_mouse/human_brain_region_88_sparse_with3d.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-    
-    '''
-    sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'], return_fig=True).savefig('./figs/mouse_67/leiden.png')
-    sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    sc.pl.dendrogram(adata, groupby = "leiden_0.6", return_fig=True).savefig('./figs/mouse_67/leiden_groupby.png')
-    
-    
-    sc.tl.louvain(adata, key_added = "louvain_1.0") # default resolution in 1.0
-    sc.tl.louvain(adata, resolution = 0.6, key_added = "louvain_0.6")
-    sc.tl.louvain(adata, resolution = 0.4, key_added = "louvain_0.4")
-    sc.tl.louvain(adata, resolution = 1.4, key_added = "louvain_1.4")
-
-    sc.pl.umap(adata, color=['louvain_0.4', 'louvain_0.6', 'louvain_1.0','louvain_1.4'], return_fig=True).savefig('./figs/mouse_67/louvain.png')
-    
-    sc.tl.dendrogram(adata, groupby = "louvain_0.6")
-    sc.pl.dendrogram(adata, groupby = "louvain_0.6", return_fig=True).savefig('./figs/mouse_67/louvain_groupby.png')
-    '''
-    
-    	
-    
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/human_88/pca.png')
-    
-    
-    
-    sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20)
-    sc.tl.umap(adata)
-    
-    sc.pl.umap(adata, color=['region_name'], return_fig=True).savefig('./figs/human_88/umap.png')
-    
-    print(adata)
-   
-
-    # extract pca coordinates
-    X_pca = adata.obsm['X_pca'] 
-
-    # kmeans with k=5
-    kmeans = KMeans(n_clusters=5, random_state=0).fit(X_pca) 
-    adata.obs['kmeans5'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=11
-    kmeans = KMeans(n_clusters=16, random_state=0).fit(X_pca) 
-    adata.obs['kmeans16'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=20
-    kmeans = KMeans(n_clusters=30, random_state=0).fit(X_pca) 
-    adata.obs['kmeans30'] = kmeans.labels_.astype(str)
-    
-     # kmeans with k=67
-    kmeans = KMeans(n_clusters=88, random_state=0).fit(X_pca) 
-    adata.obs['kmeans88'] = kmeans.labels_.astype(str)
-
-    sc.pl.umap(adata, color=['kmeans5', 'kmeans16', 'kmeans30', 'kmeans88'], return_fig=True).savefig('./figs/human_88/k-means.png')
-    
-    print(adata)
-    #print(adata.obs['kmeans88'])
-    
-    # Create the figure
-    fig = plt.figure()
-    ax = fig.add_subplot(111, projection='3d')
-
-    # Generate the values
-    x_vals = adata.obs['mri_voxel_x'].values
-    y_vals = adata.obs['mri_voxel_y'].values
-    z_vals = adata.obs['mri_voxel_z'].values
-    color_region = adata.obs['region_name'].values
-    
-    unique_region_list = list(set(color_region))
-    color_list = []
-    for region in color_region:
-        color_list.append(unique_region_list.index(region))
-    # Plot the values
-    #ax.scatter(x_vals, y_vals, z_vals, c = 'b', marker='o')
-    #ax.set_xlabel('X-axis')
-    #ax.set_ylabel('Y-axis')
-    #ax.set_zlabel('Z-axis')
-
-    #plt.savefig('./figs/human_88/mri_xyy.png')
-    
-    
-
-    
-    # axes instance
-    fig = plt.figure(figsize=(10,10))
-    ax = Axes3D(fig, auto_add_to_figure=False)
-    fig.add_axes(ax)
-
-    # get colormap from seaborn
-    cmap = ListedColormap(sns.color_palette(n_colors=88, as_cmap=True))
-
-    # plot
-    sc = ax.scatter(x_vals, y_vals, z_vals, s=10, c=color_list, marker='o', cmap=cmap, alpha=1)
-    ax.set_xlabel('MRI X')
-    ax.set_ylabel('MRI Y')
-    ax.set_zlabel('MRI Z')
-
-    # legend
-    plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
-
-    # save
-    plt.savefig("./figs/human_88/scatter_xyz_region_name.png", bbox_inches='tight')
-    
-    
-    
-    
-    color_list_str = list(adata.obs['kmeans88'].values)
-    color_list = [int(x) for x in color_list_str]
-    
-    # axes instance
-    fig = plt.figure(figsize=(10,10))
-    ax = Axes3D(fig, auto_add_to_figure=False)
-    fig.add_axes(ax)
-
-    # get colormap from seaborn
-    cmap = ListedColormap(sns.color_palette(n_colors=88, as_cmap=True))
-
-    # plot
-    sc = ax.scatter(x_vals, y_vals, z_vals, s=10, c=color_list, marker='o', cmap=cmap, alpha=1)
-    ax.set_xlabel('MRI X')
-    ax.set_ylabel('MRI Y')
-    ax.set_zlabel('MRI Z')
-
-    # legend
-    plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
-
-    # save
-    plt.savefig("./figs/human_88/scatter_xyz_kmeans_88.png", bbox_inches='tight')
-    
-    
-    #sc.pl.scatter(adata, x=['mri_voxel_x'], y=['mri_voxel_y'], color=['region_name'], save='./figs/human_88/mri_xy_region.png')
-    #sc.pl.scatter(adata, x=['mri_voxel_x'], y=['mri_voxel_z'], color=['region_name'], save='./figs/human_88/mri_xz_region.png')
-    #sc.pl.scatter(adata, x=['mri_voxel_y'], y=['mri_voxel_z'], color=['region_name'], save='./figs/human_88/mri_yz_region.png')
-    
-    
-    
-    
-    
-    	
diff --git a/run_came/Clustering_script_human_3d_16.py b/run_came/Clustering_script_human_3d_16.py
deleted file mode 100644
index 65dde86..0000000
--- a/run_came/Clustering_script_human_3d_16.py
+++ /dev/null
@@ -1,191 +0,0 @@
-
-
-import scanpy as sc, anndata as ad
-import numpy as np
-import pandas as pd
-from sklearn.cluster import KMeans
-from sklearn.metrics import adjusted_rand_score
-import matplotlib.pyplot as plt
-
-import re, seaborn as sns
-
-from mpl_toolkits.mplot3d import Axes3D
-from matplotlib.colors import ListedColormap
-
-
-
-if __name__ == "__main__":
-
-    
-    path_rawdata_mouse = "./brain_human_mouse_16_11/human_brain_region_16_sparse_with3d.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-    
-    '''
-    sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'], return_fig=True).savefig('./figs/mouse_67/leiden.png')
-    sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    sc.pl.dendrogram(adata, groupby = "leiden_0.6", return_fig=True).savefig('./figs/mouse_67/leiden_groupby.png')
-    
-    
-    sc.tl.louvain(adata, key_added = "louvain_1.0") # default resolution in 1.0
-    sc.tl.louvain(adata, resolution = 0.6, key_added = "louvain_0.6")
-    sc.tl.louvain(adata, resolution = 0.4, key_added = "louvain_0.4")
-    sc.tl.louvain(adata, resolution = 1.4, key_added = "louvain_1.4")
-
-    sc.pl.umap(adata, color=['louvain_0.4', 'louvain_0.6', 'louvain_1.0','louvain_1.4'], return_fig=True).savefig('./figs/mouse_67/louvain.png')
-    
-    sc.tl.dendrogram(adata, groupby = "louvain_0.6")
-    sc.pl.dendrogram(adata, groupby = "louvain_0.6", return_fig=True).savefig('./figs/mouse_67/louvain_groupby.png')
-    '''
-    
-    	
-    
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/human_16/pca.png')
-    
-    
-    
-    sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20)
-    sc.tl.umap(adata)
-    
-    sc.pl.umap(adata, color=['region_name'], return_fig=True).savefig('./figs/human_16/umap.png')
-    
-    print(adata)
-   
-
-    # extract pca coordinates
-    X_pca = adata.obsm['X_pca'] 
-
-    # kmeans with k=5
-    kmeans = KMeans(n_clusters=5, random_state=0).fit(X_pca) 
-    adata.obs['kmeans5'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=11
-    kmeans = KMeans(n_clusters=16, random_state=0).fit(X_pca) 
-    adata.obs['kmeans16'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=20
-    kmeans = KMeans(n_clusters=30, random_state=0).fit(X_pca) 
-    adata.obs['kmeans30'] = kmeans.labels_.astype(str)
-    
-   
-
-    sc.pl.umap(adata, color=['kmeans5', 'kmeans16', 'kmeans30'], return_fig=True).savefig('./figs/human_16/k-means.png')
-    
-    print(adata)
-    #print(adata.obs['kmeans88'])
-    
-    # Create the figure
-    fig = plt.figure()
-    ax = fig.add_subplot(111, projection='3d')
-
-    # Generate the values
-    x_vals = adata.obs['mri_voxel_x'].values
-    y_vals = adata.obs['mri_voxel_y'].values
-    z_vals = adata.obs['mri_voxel_z'].values
-    color_region = adata.obs['region_name'].values
-    
-    unique_region_list = list(set(color_region))
-    color_list = []
-    for region in color_region:
-        color_list.append(unique_region_list.index(region))
-    # Plot the values
-    #ax.scatter(x_vals, y_vals, z_vals, c = 'b', marker='o')
-    #ax.set_xlabel('X-axis')
-    #ax.set_ylabel('Y-axis')
-    #ax.set_zlabel('Z-axis')
-
-    #plt.savefig('./figs/human_88/mri_xyy.png')
-    
-    
-
-    
-    # axes instance
-    fig = plt.figure(figsize=(10,10))
-    ax = Axes3D(fig, auto_add_to_figure=False)
-    fig.add_axes(ax)
-
-    # get colormap from seaborn
-    cmap = ListedColormap(sns.color_palette(n_colors=16, as_cmap=True))
-
-    # plot
-    sc = ax.scatter(x_vals, y_vals, z_vals, s=10, c=color_list, marker='o', cmap=cmap, alpha=1)
-    ax.set_xlabel('MRI X')
-    ax.set_ylabel('MRI Y')
-    ax.set_zlabel('MRI Z')
-
-    # legend
-    plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
-
-    # save
-    plt.savefig("./figs/human_16/scatter_xyz_region_name.png", bbox_inches='tight')
-    
-    
-    
-    
-    color_list_str = list(adata.obs['kmeans16'].values)
-    color_list = [int(x) for x in color_list_str]
-    
-    # axes instance
-    fig = plt.figure(figsize=(10,10))
-    ax = Axes3D(fig, auto_add_to_figure=False)
-    fig.add_axes(ax)
-
-    # get colormap from seaborn
-    cmap = ListedColormap(sns.color_palette(n_colors=16, as_cmap=True))
-
-    # plot
-    sc = ax.scatter(x_vals, y_vals, z_vals, s=10, c=color_list, marker='o', cmap=cmap, alpha=1)
-    ax.set_xlabel('MRI X')
-    ax.set_ylabel('MRI Y')
-    ax.set_zlabel('MRI Z')
-
-    # legend
-    plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
-
-    # save
-    plt.savefig("./figs/human_16/scatter_xyz_kmeans_16.png", bbox_inches='tight')
-    
-    
-    #sc.pl.scatter(adata, x=['mri_voxel_x'], y=['mri_voxel_y'], color=['region_name'], save='./figs/human_88/mri_xy_region.png')
-    #sc.pl.scatter(adata, x=['mri_voxel_x'], y=['mri_voxel_z'], color=['region_name'], save='./figs/human_88/mri_xz_region.png')
-    #sc.pl.scatter(adata, x=['mri_voxel_y'], y=['mri_voxel_z'], color=['region_name'], save='./figs/human_88/mri_yz_region.png')
-    
-    
-    
-    
-    
-    	
diff --git a/run_came/Clustering_script_mouse.py b/run_came/Clustering_script_mouse.py
deleted file mode 100644
index dffb29f..0000000
--- a/run_came/Clustering_script_mouse.py
+++ /dev/null
@@ -1,170 +0,0 @@
-
-
-import scanpy as sc, anndata as ad
-import numpy as np
-import pandas as pd
-from sklearn.cluster import KMeans
-from sklearn.metrics import adjusted_rand_score
-
-
-if __name__ == "__main__":
-
-    
-    path_rawdata_mouse = "./brain_human_mouse/mouse_brain_region_67_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-    
-    '''
-    sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'], return_fig=True).savefig('./figs/mouse_67/leiden.png')
-    sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    sc.pl.dendrogram(adata, groupby = "leiden_0.6", return_fig=True).savefig('./figs/mouse_67/leiden_groupby.png')
-    
-    
-    sc.tl.louvain(adata, key_added = "louvain_1.0") # default resolution in 1.0
-    sc.tl.louvain(adata, resolution = 0.6, key_added = "louvain_0.6")
-    sc.tl.louvain(adata, resolution = 0.4, key_added = "louvain_0.4")
-    sc.tl.louvain(adata, resolution = 1.4, key_added = "louvain_1.4")
-
-    sc.pl.umap(adata, color=['louvain_0.4', 'louvain_0.6', 'louvain_1.0','louvain_1.4'], return_fig=True).savefig('./figs/mouse_67/louvain.png')
-    
-    sc.tl.dendrogram(adata, groupby = "louvain_0.6")
-    sc.pl.dendrogram(adata, groupby = "louvain_0.6", return_fig=True).savefig('./figs/mouse_67/louvain_groupby.png')
-    '''
-    
-    	
-    
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/mouse_67/pca.png')
-    
-    
-    
-    sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20)
-    sc.tl.umap(adata)
-    
-    sc.pl.umap(adata, color=['region_name'], return_fig=True).savefig('./figs/mouse_67/umap.png')
-    
-    print(adata)
-   
-
-    # extract pca coordinates
-    X_pca = adata.obsm['X_pca'] 
-
-    # kmeans with k=5
-    kmeans = KMeans(n_clusters=5, random_state=0).fit(X_pca) 
-    adata.obs['kmeans5'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=11
-    kmeans = KMeans(n_clusters=11, random_state=0).fit(X_pca) 
-    adata.obs['kmeans11'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=20
-    kmeans = KMeans(n_clusters=20, random_state=0).fit(X_pca) 
-    adata.obs['kmeans20'] = kmeans.labels_.astype(str)
-    
-     # kmeans with k=67
-    kmeans = KMeans(n_clusters=67, random_state=0).fit(X_pca) 
-    adata.obs['kmeans67'] = kmeans.labels_.astype(str)
-
-    sc.pl.umap(adata, color=['kmeans5', 'kmeans11', 'kmeans20', 'kmeans67'], return_fig=True).savefig('./figs/mouse_67/k-means.png')
-    
-    
-    
-    
-    
-    path_rawdata_mouse = "./brain_human_mouse_16_11/mouse_brain_region_11_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/mouse_11/pca.png')
-    
-    
-    sc.pp.neighbors(adata, n_pcs = 30, n_neighbors = 20)
-    sc.tl.umap(adata)
-    
-    sc.pl.umap(adata, color=['region_name'], return_fig=True).savefig('./figs/mouse_11/umap.png')
-    
-    print(adata)
-   
-
-    # extract pca coordinates
-    X_pca = adata.obsm['X_pca'] 
-
-    # kmeans with k=5
-    kmeans = KMeans(n_clusters=5, random_state=0).fit(X_pca) 
-    adata.obs['kmeans5'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=11
-    kmeans = KMeans(n_clusters=11, random_state=0).fit(X_pca) 
-    adata.obs['kmeans11'] = kmeans.labels_.astype(str)
-
-    # kmeans with k=20
-    kmeans = KMeans(n_clusters=20, random_state=0).fit(X_pca) 
-    adata.obs['kmeans20'] = kmeans.labels_.astype(str)
-    
-     # kmeans with k=67
-    #kmeans = KMeans(n_clusters=67, random_state=0).fit(X_pca) 
-    #adata.obs['kmeans67'] = kmeans.labels_.astype(str)
-
-    sc.pl.umap(adata, color=['kmeans5', 'kmeans11', 'kmeans20'], return_fig=True).savefig('./figs/mouse_11/k-means.png')    
diff --git a/run_came/brain_voxel_sample.py b/run_came/brain_voxel_sample.py
deleted file mode 100644
index 2511482..0000000
--- a/run_came/brain_voxel_sample.py
+++ /dev/null
@@ -1,268 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_human', 'Baron_mouse')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = './brain_human_mouse/human_brain_region_88_imputed.h5ad'
-    path_rawdata2 = './brain_human_mouse/mouse_brain_region_67.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_human2mouse.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_human2mouse.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_human_mouse/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsnames}-{time_tag}'  # set by user
-    figdir = resdir + '/figs'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=3)
-    sc.pp.filter_genes(adata_raw2, min_cells=3)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 300
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 50
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-
-    ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir / f'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir / 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir / 'obs.csv')
-    adt.write(resdir / 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    f_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir / 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir / 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir / f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir / f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'human', 'mouse'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir / f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir / 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/brain_voxel_sample_human_mouse.py b/run_came/brain_voxel_sample_human_mouse.py
deleted file mode 100644
index c272571..0000000
--- a/run_came/brain_voxel_sample_human_mouse.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_human', 'Baron_mouse')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = './brain_human_mouse/human_brain_region_88_sparse.h5ad'
-    path_rawdata2 = './brain_human_mouse/mouse_brain_region_67_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_human2mouse.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_human2mouse.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_human_mouse/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsnames}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=3)
-    sc.pp.filter_genes(adata_raw2, min_cells=3)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 300
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 2048
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 50
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'human', 'mouse'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/brain_voxel_sample_human_mouse_16_11.py b/run_came/brain_voxel_sample_human_mouse_16_11.py
deleted file mode 100644
index ba30c9e..0000000
--- a/run_came/brain_voxel_sample_human_mouse_16_11.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_human', 'Baron_mouse')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = './brain_human_mouse_16_11/human_brain_region_16_sparse.h5ad'
-    path_rawdata2 = './brain_human_mouse_16_11/mouse_brain_region_11_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_human2mouse.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_human2mouse.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_human_mouse_16_11/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsnames}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=3)
-    sc.pp.filter_genes(adata_raw2, min_cells=3)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 300
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 2048
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 50
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'human', 'mouse'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/brain_voxel_sample_mouse_human.py b/run_came/brain_voxel_sample_mouse_human.py
deleted file mode 100644
index 7bcbfd0..0000000
--- a/run_came/brain_voxel_sample_mouse_human.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_mouse', 'Baron_human')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = './brain_human_mouse/mouse_brain_region_67_sparse.h5ad'
-    path_rawdata2 = './brain_human_mouse/human_brain_region_88_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_mouse2human.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_mouse2human.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_human_mouse/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsnames}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=3)
-    sc.pp.filter_genes(adata_raw2, min_cells=3)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 400
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 50
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'mouse', 'human'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/brain_voxel_sample_mouse_human_16_11.py b/run_came/brain_voxel_sample_mouse_human_16_11.py
deleted file mode 100644
index aeca354..0000000
--- a/run_came/brain_voxel_sample_mouse_human_16_11.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_mouse', 'Baron_human')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = './brain_human_mouse_16_11/mouse_brain_region_11_sparse.h5ad'
-    path_rawdata2 = './brain_human_mouse_16_11/human_brain_region_16_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_mouse2human.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_mouse2human.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_human_mouse_16_11/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsnames}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=3)
-    sc.pp.filter_genes(adata_raw2, min_cells=3)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 300
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 50
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'human', 'mouse'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/brain_voxel_sample_mouse_human_16_11_no_threshold.py b/run_came/brain_voxel_sample_mouse_human_16_11_no_threshold.py
deleted file mode 100644
index eb3dc24..0000000
--- a/run_came/brain_voxel_sample_mouse_human_16_11_no_threshold.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_mouse', 'Baron_human')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = '../Brain_ST_human_mouse/data/mouse_brain_region_11_sparse_no_threshold.h5ad'
-    path_rawdata2 = './brain_human_mouse_16_11/human_brain_region_16_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_mouse2human.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_mouse2human.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_human_mouse_16_11/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsnames}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=3)
-    sc.pp.filter_genes(adata_raw2, min_cells=3)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 300
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 50
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'human', 'mouse'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/brain_voxel_sample_mouse_human_no_threshold.py b/run_came/brain_voxel_sample_mouse_human_no_threshold.py
deleted file mode 100644
index 8d1fbf8..0000000
--- a/run_came/brain_voxel_sample_mouse_human_no_threshold.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_mouse', 'Baron_human')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = '../Brain_ST_human_mouse/data/mouse_brain_region_67_sparse_no_threshold.h5ad'
-    path_rawdata2 = './brain_human_mouse/human_brain_region_88_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_mouse2human.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_mouse2human.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_mouse_human_no_threshold_500/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsnames}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=1)
-    sc.pp.filter_genes(adata_raw2, min_cells=1)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 400
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 70
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'mouse', 'human'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/brain_voxel_sample_mouse_human_no_threshold_dilution.py b/run_came/brain_voxel_sample_mouse_human_no_threshold_dilution.py
deleted file mode 100644
index 099ef09..0000000
--- a/run_came/brain_voxel_sample_mouse_human_no_threshold_dilution.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2022
-
-@author: Biao Zhang
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-from heco_utils import threshold_quantile
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_mouse', 'Baron_human')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = '../Brain_ST_human_mouse/data/mouse_brain_region_67_sparse_no_threshold.h5ad'
-    path_rawdata2 = './brain_human_mouse/human_brain_region_88_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_mouse2human.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_mouse2human.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_mouse_human_no_threshold_sparse/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT + f'{dsn1}-{dsn2}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=1)
-    sc.pp.filter_genes(adata_raw2, min_cells=1)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 300
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 70
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    print('came_results:', came_inputs)
-
-    print('Mouse features: ', came_inputs['adatas'][0])
-    X = came_inputs['adatas'][0].X.toarray()
-    print(type(X))
-    came_inputs['adatas'][0].X = sparse.csr_matrix(threshold_quantile(X, quantile_gene=0.8, quantile_sample=0.99))
-
-    print('Human features: ', came_inputs['adatas'][1])
-    X = came_inputs['adatas'][1].X.toarray()
-    print(type(X))
-    came_inputs['adatas'][1].X = sparse.csr_matrix(threshold_quantile(X, quantile_gene=0.8, quantile_sample=0.99))
-
-
-
-
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'mouse', 'human'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-
-    
-
diff --git a/run_came/brain_voxel_sample_mouse_human_no_threshold_sagittal.py b/run_came/brain_voxel_sample_mouse_human_no_threshold_sagittal.py
deleted file mode 100644
index 5ce6f9a..0000000
--- a/run_came/brain_voxel_sample_mouse_human_no_threshold_sagittal.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-if __name__ == '__main__':
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_mouse', 'Baron_human')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = '../Brain_ST_human_mouse/data/mouse_brain_region_67_sagittal.h5ad'
-    path_rawdata2 = './brain_human_mouse/human_brain_region_88_sparse.h5ad'
-    
-    path_varmap = './came/sample_data/gene_matches_mouse2human.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_mouse2human.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = '/home1/zhangbiao/CAME/brain_mouse_human_sagittal_1000/'
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsn1}-{dsn2}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=1)
-    sc.pp.filter_genes(adata_raw2, min_cells=1)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 400
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 70
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'mouse', 'human'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-    
-    
-    
-    
-
diff --git a/run_came/pca_script_human.py b/run_came/pca_script_human.py
deleted file mode 100644
index 02cdba3..0000000
--- a/run_came/pca_script_human.py
+++ /dev/null
@@ -1,85 +0,0 @@
-
-
-import scanpy as sc, anndata as ad
-import numpy as np
-import pandas as pd
-
-
-if __name__ == "__main__":
-
-    
-    path_rawdata_human = "./brain_human_mouse/human_brain_region_88_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_human)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-    
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/human_88/pca.png')
-    
-    
-
-    
-    
-    
-    path_rawdata_human = "./brain_human_mouse_16_11/human_brain_region_16_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_human)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/human_16/pca.png')
-    
diff --git a/run_came/pca_script_mouse.py b/run_came/pca_script_mouse.py
deleted file mode 100644
index 622b5b7..0000000
--- a/run_came/pca_script_mouse.py
+++ /dev/null
@@ -1,81 +0,0 @@
-
-
-import scanpy as sc, anndata as ad
-import numpy as np
-import pandas as pd
-
-
-if __name__ == "__main__":
-
-    
-    path_rawdata_mouse = "./brain_human_mouse/mouse_brain_region_67_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-    
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/mouse_67/pca.png')
-    
-    
-    path_rawdata_mouse = "./brain_human_mouse_16_11/mouse_brain_region_11_sparse.h5ad"
-    adata = sc.read_h5ad(path_rawdata_mouse)
-    
-    sc.settings.verbosity = 3
-    #sc.settings.set_figure_params(dpi=80)
-
-    
-    #sc.tl.leiden(adata, key_added = "leiden_1.0") # default resolution in 1.0
-    #sc.tl.leiden(adata, resolution = 0.6, key_added = "leiden_0.6")
-    #sc.tl.leiden(adata, resolution = 0.4, key_added = "leiden_0.4")
-    #sc.tl.leiden(adata, resolution = 1.4, key_added = "leiden_1.4")
-    
-    #sc.pl.umap(adata, color=['leiden_0.4', 'leiden_0.6', 'leiden_1.0','leiden_1.4'])
-    
-    #sc.tl.dendrogram(adata, groupby = "leiden_0.6")
-    #sc.pl.dendrogram(adata, groupby = "leiden_0.6")
-    
-    
-    #sc.pp.log1p(adata)
-    # store normalized counts in the raw slot, 
-    # we will subset adata.X for variable genes, but want to keep all genes matrix as well.
-    #adata.raw = adata
-    #adata.X =  pd.df(adata.X).fillna(0)
-    #sc.pp.highly_variable_genes(adata, min_mean=0.001, max_mean=3, min_disp=0.1)
-    #print("Highly variable genes: %d"%sum(adata.var.highly_variable))
-
-    #plot variable genes
-    #sc.pl.highly_variable_genes(adata)
-
-    # subset for variable genes in the dataset
-    #adata = adata[:, adata.var['highly_variable']]
-
-    sc.tl.pca(adata, svd_solver='arpack')
-    sc.pl.pca(adata, color='region_name', components = ['1,2','3,4','5,6','7,8'], ncols=2, return_fig=True).savefig('./figs/mouse_11/pca.png')
-    
diff --git a/run_came/run_came_plot.py b/run_came/run_came_plot.py
deleted file mode 100644
index 4ee3915..0000000
--- a/run_came/run_came_plot.py
+++ /dev/null
@@ -1,281 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed May  5 20:08:52 2021
-
-@author: Xingyan Liu
-"""
-import came
-from came import pipeline, pp, pl
-
-import logging
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-from matplotlib import pyplot as plt  # optional
-import seaborn as sns  # optional
-
-import scanpy as sc
-from scipy import sparse
-
-import networkx as nx
-import torch
-
-
-try:
-    import matplotlib as mpl
-    mpl.use('agg')
-except Exception as e:
-    print(f"An error occurred when setting matplotlib backend ({e})")
-
-def run_came(path_rawdata1 = '../Brain_ST_human_mouse/data/mouse_brain_region_67_sagittal.h5ad',
-             path_rawdata2 = './brain_human_mouse/human_brain_region_88_sparse.h5ad',
-             ROOT_path = '/home1/zhangbiao/CAME/brain_mouse_human_sagittal/'):
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s %(filename)s-%(lineno)d-%(funcName)s(): '
-               '%(levelname)s\n %(message)s')
-
-    #came.__test1__(10, batch_size=None, reverse=False)
-    #came.__test2__(10, batch_size=2048)
-    dsnames = ('Baron_mouse', 'Baron_human')  # the dataset names, set by user
-    dsn1, dsn2 = dsnames
-
-    path_rawdata1 = path_rawdata1
-    path_rawdata2 = path_rawdata2
-    
-    path_varmap = './came/sample_data/gene_matches_mouse2human.csv'
-    path_varmap_1v1 = './came/sample_data/gene_matches_1v1_mouse2human.csv'
-    
-    # ========= load data =========
-    df_varmap = pd.read_csv(path_varmap)
-    df_varmap_1v1 = pd.read_csv(path_varmap_1v1) if path_varmap_1v1 else came.pp.take_1v1_matches(df_varmap)
-
-    adata_raw1 = sc.read_h5ad(path_rawdata1)
-    adata_raw2 = sc.read_h5ad(path_rawdata2)
-    adatas = [adata_raw1, adata_raw2]
-    
-    key_class1 = 'region_name'  # set by user
-    key_class2 = 'region_name'  # set by user
-    
-    ROOT = ROOT_path
-    time_tag = came.make_nowtime_tag()
-    resdir = ROOT +  f'{dsn1}-{dsn2}-{time_tag}/'  # set by user
-    figdir = resdir + '/figs/'
-    came.check_dirs(figdir)  # check and make the directory
-    
-    sc.pp.filter_genes(adata_raw1, min_cells=1)
-    sc.pp.filter_genes(adata_raw2, min_cells=1)
-    
-    # Inspect classes
-    if key_class2 is not None:
-        group_counts_ori = pd.concat([
-            pd.value_counts(adata_raw1.obs[key_class1]),
-            pd.value_counts(adata_raw2.obs[key_class2]),
-        ], axis=1, keys=dsnames)
-    else:
-        group_counts_ori = pd.value_counts(adata_raw1.obs[key_class1])
-        
-    print(group_counts_ori)
-        
-        
-    # The numer of training epochs
-    # (a recommended setting is 200-400 for whole-graph training, and 80-200 for sub-graph training)
-    n_epochs = 400
-
-    # The training batch size
-    # When the GPU memory is limited, set 4096 or more if possible.
-    batch_size = 4096
-
-    # The number of epochs to skip for checkpoint backup
-    n_pass = 100
-
-    # Whether to use the single-cell networks
-    use_scnets = True
-
-    # The number of top DEGs to take as the node-features of each cells.
-    # You set it 70-100 for distant species pairs.
-    ntop_deg = 70
-
-    # The number of top DEGs to take as the graph nodes, which can be directly displayed on the UMAP plot.
-    ntop_deg_nodes = 50
-    # The source of the node genes; use both DEGs and HVGs by default
-    node_source = 'deg,hvg'
-
-    # Whether to take into account the non-1v1 variables as the node features.
-    keep_non1v1_feats = True
-    
-    came_inputs, (adata1, adata2) = pipeline.preprocess_unaligned(
-        adatas,
-        key_class=key_class1,
-        use_scnets=use_scnets,
-        ntop_deg=ntop_deg,
-        ntop_deg_nodes=ntop_deg_nodes,
-        node_source=node_source,
-        )
-
-    outputs = pipeline.main_for_unaligned(
-        **came_inputs,
-        df_varmap=df_varmap,
-        df_varmap_1v1=df_varmap_1v1,
-        dataset_names=dsnames,
-        key_class1=key_class1,
-        key_class2=key_class2,
-        do_normalize=True,
-        keep_non1v1_feats=keep_non1v1_feats,
-        n_epochs=n_epochs,
-        resdir=resdir,
-        n_pass=n_pass,
-        batch_size=batch_size,
-        plot_results=True,
-        device_id = '1',
-        )
-        
-    dpair = outputs['dpair']
-    trainer = outputs['trainer']
-    h_dict = outputs['h_dict']
-    out_cell = outputs['out_cell']
-    predictor = outputs['predictor']
-
-
-    obs_ids1, obs_ids2 = dpair.obs_ids1, dpair.obs_ids2
-    obs = dpair.obs
-    classes = predictor.classes
-    
-    # plot figures
-    y_true = obs['celltype'][obs_ids2].values
-    y_pred = obs['predicted'][obs_ids2].values
-    print(y_true.shape)
-    print(y_pred.shape)
-    print(type(y_true[0]))
-    print(type(y_pred[0]))
-
-    #ax, contmat = pl.plot_contingency_mat(y_true, y_pred, norm_axis=1, order_rows=False, order_cols=False,)
-    #pl._save_with_adjust(ax.figure, figdir / 'contingency_mat.png')
-    #ax.figure
-    
-    name_label = 'celltype'
-    cols_anno = ['celltype', 'predicted'][:]
-    df_probs = obs[list(classes)]
-
-
-    gs = pl.wrapper_heatmap_scores(df_probs.iloc[obs_ids2], obs.iloc[obs_ids2], ignore_index=True,
-    col_label='celltype', col_pred='predicted',
-    n_subsample=50,  # sampling 50 cells for each group
-    cmap_heat='magma_r',
-    fp=figdir + 'heatmap_probas.pdf')
-
-    gs.figure
-    
-    
-    
-    # further analysis
-    hidden_list = came.load_hidden_states(resdir + 'hidden_list.h5')
-    #hidden_list  # a list of dicts
-    h_dict = hidden_list[-1]
-    # the last layer of hidden states
-    
-    adt = pp.make_adata(h_dict['cell'], obs=dpair.obs, assparse=False, ignore_index=True)
-    gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    
-    # UMAP of cell embeddings
-    sc.set_figure_params(dpi_save=200)
-
-    sc.pp.neighbors(adt, n_neighbors=15, metric='cosine', use_rep='X')
-    sc.tl.umap(adt)
-    # sc.pl.umap(adt, color=['dataset', 'celltype'], ncols=1)
-
-    ftype = ['.svg', ''][1]
-    sc.pl.umap(adt, color='dataset', save=f'-dataset{ftype}')
-    sc.pl.umap(adt, color='celltype', save=f'-ctype{ftype}')
-    
-    obs_umap = adt.obsm['X_umap']
-    obs['UMAP1'] = obs_umap[:, 0]
-    obs['UMAP2'] = obs_umap[:, 1]
-    obs.to_csv(resdir + 'obs.csv')
-    adt.write(resdir + 'adt_hidden_cell.h5ad')
-    
-    adata1.obsm['X_umap'] = obs_umap[obs_ids1]
-    adata2.obsm['X_umap'] = obs_umap[obs_ids2]
-    
-    # Umap of genes
-    sc.pp.neighbors(gadt, n_neighbors=15, metric='cosine', use_rep='X')
-
-    # gadt = pp.make_adata(h_dict['gene'], obs=dpair.var.iloc[:, :2], assparse=False, ignore_index=True)
-    sc.tl.umap(gadt)
-    sc.pl.umap(gadt, color='dataset', )
-    
-    # joint gene module extraction
-    sc.tl.leiden(gadt, resolution=.8, key_added='module')
-    sc.pl.umap(gadt, color='module', ncols=1, palette='tab20b')
-    
-    # gadt.obs_names = gadt.obs_names.astype(str)
-    gadt1, gadt2 = pp.bisplit_adata(gadt, 'dataset', dsnames[0], reset_index_by='name')
-
-    color_by = 'module'
-    palette = 'tab20b'
-    sc.pl.umap(gadt1, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-    sc.pl.umap(gadt2, color=color_by, s=10, edges=True, edges_width=0.05,
-           palette=palette,
-           save=f'_{color_by}-{dsnames[0]}')
-           
-    df_var_links = came.weight_linked_vars(
-    gadt.X, dpair._vv_adj, names=dpair.get_vnode_names(),
-    matric='cosine', index_names=dsnames,)
-
-    gadt1.write(resdir + 'adt_hidden_gene1.h5ad')
-    gadt2.write(resdir + 'adt_hidden_gene2.h5ad')
-    
-    # Gene-expression-profiles (for each cell type) on gene UMAP
-    # averaged expressions
-    avg_expr1 = pp.group_mean_adata(adatas[0], groupby=key_class1,features=dpair.vnode_names1, use_raw=True)
-    avg_expr2 = pp.group_mean_adata(adatas[1], groupby=key_class2 if key_class2 else 'predicted',features=dpair.vnode_names2, use_raw=True)
-    
-    plkwds = dict(cmap='RdYlBu_r', vmax=2.5, vmin=-1.5, do_zscore=True,
-              axscale=3, ncols=5, with_cbar=True)
-
-    fig1, axs1 = pl.adata_embed_with_values(gadt1, avg_expr1, fp=figdir + f'umap_exprAvgs-{dsnames[0]}-all.png', **plkwds)
-    fig2, axs2 = pl.adata_embed_with_values(gadt2, avg_expr2, fp=figdir + f'umap_exprAvgs-{dsnames[1]}-all.png', **plkwds)
-    
-    # Abstracted graph
-    norm_ov = ['max', 'zs', None][1]
-    cut_ov = 0.
-
-    groupby_var = 'module'
-    obs_labels1, obs_labels2 = adt.obs['celltype'][dpair.obs_ids1], \
-                           adt.obs['celltype'][dpair.obs_ids2]
-    var_labels1, var_labels2 = gadt1.obs[groupby_var], gadt2.obs[groupby_var]
-
-    sp1, sp2 = 'mouse', 'human'
-    g = came.make_abstracted_graph(
-        obs_labels1, obs_labels2,
-        var_labels1, var_labels2,
-        avg_expr1, avg_expr2,
-        df_var_links,
-        tags_obs=(f'{sp1} ', f'{sp2} '),
-        tags_var=(f'{sp1} module ', f'{sp2} module '),
-        cut_ov=cut_ov,
-        norm_mtd_ov=norm_ov,)
-        
-    #visualization
-    fp_abs = figdir + f'abstracted_graph-{groupby_var}-cut{cut_ov}-{norm_ov}.pdf'
-    ax = pl.plot_multipartite_graph(g, edge_scale=10, figsize=(9, 7.5), alpha=0.5, fp=fp_abs)  # nodelist=nodelist,
-
-    ax.figure
-    
-    came.save_pickle(g, resdir + 'abs_graph.pickle')
-
-    return resdir
-
-
-
-
-    
-    
-    
-    
-