-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
102 lines (79 loc) · 3.48 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
outdir: "out/" #output directory path, include a '/' at the end
diamond_db: "KOunt_databases_v1/KOunt_AA_1.dmnd"
mmseq_db: "KOunt_databases_v1/KOunt_RNA_1.mmseq"
combined_bdg: "KOunt_databases_v1/KOunt_all_1.bedgraph"
kallisto: "KOunt_databases_v1/KOunt_RNA_1.kallisto"
splitting_multiples: "" #if splitting protein coverage between multiple KOs leave blank. If grouping use #.
grouping_multiples: "#" #if grouping proteins that have hits with multiple KOs leave blank. If splitting use #.
checking_fqs: "" #if checking the fastqs have unique read ids then leave blank. If not use #.
not_checking_fqs: "#" #if not checking the fastqs have unique read ids then leave blank. If you are then use #.
#rule trim
raw_reads: "test_fastqs/" #directory containing the raw reads, include a '/' at the end
r1_ext: "_R1.fastq.gz" #R1 file extension after the sample id
r2_ext: "_R2.fastq.gz" #R2 file extension after the sample id
r1ad: "AGATCGGAAGAGC" #R1 adapter sequence
r2ad: "AGATCGGAAGAGC" #R2 adapter sequence
polyg: "-g" #enable polyG tail trimming, leave empty if not required
qual: "-Q" #disable quality trimming, leave empty if not required
minlen: "50" #minimum read length allowed
overlap: "5" #minimum length to detect overlapped regions
trim_threads: "4" #number of threads
#rule megahit
mega_mem: "4.8e+10" #memory allocated to megahit
mega_threads: "1" #number of threads
mega_kmers: "27,37,47,57,67,77,87" #kmer sizes
mega_len: "300" #minimum contig length
#rule bwa
bwa_threads: "4" #number of threads
#rule coverage
evenness_yes: "" #leave empty to filter the results by coverage evenness
evenness_no: "#" #leave empty to not filter the results by coverage evenness, must be the opposite of evenness_yes
cov_split: "5000" #number of contigs to include in the split bams. If you're struggling with memory issues decrease this number
#rule kegg_db
db: "out/Kofamscan/kofam/" #directory where kofam profiles directory and list file stored. Leave as default to download the most recent version
#rule kofamscan
kofamscan_threads: "4" #number of threads
#rule kofamscan_results
evenness_pid: "0.95" #evenness percentage threshold
#rule cdhit
cdhit_mem: "32000" #maximum memory allocated to cdhit
cdhit_threads: "8" #number of threads
#rule split_keggs
split_num: "10" #the number of chunks to split the KOs into. If this is changed the config file for split needs amending to the new number of chunks
#rule mmseq_keggs
mmseq_keggs_threads: "8" #number of threads
#rule mmseq_nohit
mmseq_nohit_threads: "8" #number of threads
#rule mmseq_cluster_count
mmseq_cluster_threads: "8" #number of threads
mmseq_cluster_mem: "128000" #maximum memory allocated to mmseq
#rule cdhit_all
tmp_cat: "" #directory to write the concatenated proteins to temporarily
#mmseq_all
mmseq_all_threads: "8" #number of threads
#kofamscan_split
kofamscan_split: "split_kofamscan/" #directory to write the split kofamscan results to temporarily
#diamond_search
dia_threads: "8" #number of threads
min_qc: "90" #minimum percentage of the reference hit that the protein has to be
max_qc: "110" #maximum percentage of the reference hit that the protein has to be
min_pid: "80" #minimum percentage identity
#barrnap
barrnap: "4" #number of threads
#nohit_annotate_reads
nohit: "8" #number of threads
#kallisto
kallisto_threads: "8" #number of threads
#unmapped_reads
unmapped_threads: "8" #number of threads
split:
'00': '00'
'01': '01'
'02': '02'
'03': '03'
'04': '04'
'05': '05'
'06': '06'
'07': '07'
'08': '08'
'09': '09'