-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_2.py
executable file
·86 lines (73 loc) · 4.62 KB
/
cluster_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import h5py
import os
import argparse
import numpy as np
import pandas as pd
import scanpy as sc
def cluster(args):
"""
Clustering cells after computing pca and neiborhood distances.
"""
input = args.input
out = args.out
dpi = args.dpi
figsize = args.figsize
figure_type = args.figure_type
show = args.show
project = args.project if (args.project == "") else ("_" + args.project)
resolution = args.resolution
n_neighbors = args.n_neighbors
n_pcs = args.n_pcs
#method = args.method
#metric = args.metric
color_gene = args.color_gene
key_added = args.key_added
# set scanpy parameters
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
# in scanpy version 1.6.1 tutorial: sc.logging.print_header()
sc.logging.print_version_and_date()
sc.logging.print_versions()
# default figsize=None, means it doesn't change the seaborn defined default parameters
sc.settings.set_figure_params(dpi=dpi, facecolor='white', figsize=figsize)
adata = sc.read_h5ad(input)
### Computing, embedding, and clustering the neighborhood graph
# defaults are: n_neighbors= 15, n_pcs=None
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=n_pcs)
sc.tl.umap(adata)
# plot umap using raw data: normalized and logarithimized but not regressed out
# sc.pl.umap(adata, color=color, save="_on_raw_"+project+"."+figure_type)
# plot umap using scaled and corrected gene expression
# sc.pl.umap(adata, color=color, use_raw=False, save="_"+project+"."+figure_type)
# cluster using leeiden graph-clustering method
# default resolution=1.0
sc.tl.leiden(adata, resolution=resolution, key_added=key_added)
sc.pl.umap(adata, color=color_gene, show=show, save="_after_leiden"+project+"."+figure_type)
adata.write(out)
def main():
parser = argparse.ArgumentParser(description="Arguments for scRNA-seq Clustering")
# basic parameters
parser.add_argument("-i", "--input", type=str, help="the path of count_after_QC.h5ad file", default="count_after_QC.h5ad")
parser.add_argument("-d", "--dpi", type=int, help="the resolution of the output figure", default=80)
parser.add_argument("-f", "--figure_type", type=str, help="the export type of plots, e.g., png, pdf, or svg", default="pdf")
parser.add_argument("-p", "--project", type=str, help="the project name", default="")
parser.add_argument("-o", "--out", type=str, help="the file name to save the anndata object", default="after_leiden.h5ad")
parser.add_argument("-s", "--figsize", type=float, nargs=2, help="the size of output figure, use 2 numbers, e.g., 2 2")
parser.add_argument("-S", "--show", type=lambda x: (str(x).lower() in ['true', "1", "yes"]), help="block output figures on the screen by providing no, false, or 0")
# umap parmeters
parser.add_argument("-n", "--n_neighbors", type=int, help="the size of local neiborhood for manifold approximation", default=15)
parser.add_argument("-P", "--n_pcs", type=int, help="the number of PCs to use", default=None)
parser.add_argument("-m", "--method", type=str, help="the method for neighborhood graph, either ‘umap’, ‘gauss’, ‘rapids’", default="umap")
parser.add_argument("-M", "--metric", type=str, help="the metric for neighborhood graph, [‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’], Literal[‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’, ‘mahalanobis’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’],", default="euclidean")
# leiden parameters
parser.add_argument("-r", "--resolution", type=float, help="the resolution for leiden", default=1.0)
# color parameters and key names to be stored in adata
parser.add_argument("-C", "--color_gene", type=str, nargs="*", help="define a list of genes (e.g., MAP2 TEME199 TMEM106B), a key of leiden (e.g., 'leiden' or other key_added like 'leiden_0.6'), or both as color hues in umap plot", default="leiden")
# parser.add_argument("-g", "--gene_list", type=str, nargs="+", action="store", dest="list", help="define a list of genes to show in umap, e.g., MAP2 TEME199 NIL", default=['leiden'])
parser.add_argument("-k", "--key_added", type=str, help="the key name of a ledien anaysis to be addeed to anndata", default='leiden')
parser.set_defaults(func=cluster)
args = parser.parse_args()
args.func(args)
print()
print(f"The arguments are {args}")
if __name__ == "__main__":
main()