-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile_noRtN
254 lines (223 loc) · 7.99 KB
/
Snakefile_noRtN
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
SAMPLES, = glob_wildcards("sequencing/selected_fastqfiles/{sample}.fastq")
rule all:
input:
expand("sequencing/merged/{sample}.merged.bam", sample=SAMPLES)
ruleorder: trim > bwa_mem > samtools_sort > samtools_index > move_firstclean_bam > move_firstclean_bai > cp_secondclean_bam > cp_secondclean_bai > pycision_firstclean > pycision_secondclean > move_firstclean > rename_secondclean > bam2fastq > bwa_mem2 > samtools_sort2 > samtools_index2 > pycision_thirdclean > move_thirdclean > samtools_bam2sam_exceptfirst > samtools_bam2sam_mt162corrected > change_length_exceptfirst > change_length_mt162corrected > sam2bam_exceptfirst > sam2bam_mt162corrected > samtools_sort_exceptfirst > samtools_sort_mt162corrected > samtools_index_exceptfirst > samtools_index_mt162corrected > merge
rule trim:
input:
"sequencing/selected_fastqfiles/{sample}.fastq"
output:
fastq="sequencing/trimmed_fastqfiles/{sample}.trimmed.fastq",
logfile="sequencing/trimmed_fastqfiles/{sample}_logfile"
shell:
"java -jar tools/trimmomatic-0.39.jar SE -phred33 -trimlog {output.logfile} {input} {output.fastq} SLIDINGWINDOW:5:17.5 CROP:160 MINLEN:35"
rule bwa_mem:
input:
fa="sequencing/othermtDNAref/PrecisionID_mtDNA_rCRS.fasta",
fastq=rules.trim.output.fastq
output:
temp("sequencing/trimmed_mappedreads/{sample}.bam")
shell:
"bwa mem {input.fa} {input.fastq} | samtools view -hSb - > {output}"
rule samtools_sort:
input:
rules.bwa_mem.output
output:
"sequencing/trimmed_sortedreads/{sample}.sorted.bam"
shell:
"samtools sort {input} -o {output}"
rule samtools_index:
input:
rules.samtools_sort.output
output:
"sequencing/trimmed_sortedreads/{sample}.sorted.bam.bai"
shell:
"samtools index {input}"
rule move_firstclean_bam:
input:
rules.samtools_sort.output
output:
temp("sequencing/pycision_firstclean/{sample}.exceptfirst_uncut.sorted.bam")
shell:
"cp {input} {output}"
rule move_firstclean_bai:
input:
rules.samtools_index.output
output:
temp("sequencing/pycision_firstclean/{sample}.exceptfirst_uncut.sorted.bam.bai")
shell:
"cp {input} {output}"
rule cp_secondclean_bam:
input:
rules.samtools_sort.output
output:
temp("sequencing/pycision_secondclean/{sample}.justmt162_uncut.sorted.bam")
shell:
"cp {input} {output}"
rule cp_secondclean_bai:
input:
rules.samtools_index.output
output:
temp("sequencing/pycision_secondclean/{sample}.justmt162_uncut.sorted.bam.bai")
shell:
"cp {input} {output}"
rule pycision_firstclean:
input:
bam=rules.move_firstclean_bam.output,
bai=rules.move_firstclean_bai.output
output:
bam=temp("sequencing/pycision_firstclean/{sample}.exceptfirst_uncut.sorted.softClipped.halfway.bam"),
sorted_bam=temp("sequencing/pycision_firstclean/{sample}.exceptfirst_uncut.sorted.softClipped.halfway.sorted.bam"),
bai=temp("sequencing/pycision_firstclean/{sample}.exceptfirst_uncut.sorted.softClipped.halfway.sorted.bam.bai")
shell:
"python3 tools/pycision.py -q 4 -f sequencing/1_exceptfirst.bed {input.bam}"
rule pycision_secondclean:
input:
bam=rules.cp_secondclean_bam.output,
bai=rules.cp_secondclean_bai.output
output:
bam=temp("sequencing/pycision_secondclean/{sample}.justmt162_uncut.sorted.softClipped.halfway.bam"),
sorted_bam=temp("sequencing/pycision_secondclean/{sample}.justmt162_uncut.sorted.softClipped.halfway.sorted.bam"),
bai=temp("sequencing/pycision_secondclean/{sample}.justmt162_uncut.sorted.softClipped.halfway.sorted.bam.bai")
shell:
"python3 tools/pycision.py -q 4 -f sequencing/2_wholemt162.bed {input.bam}"
rule move_firstclean:
input:
rules.pycision_firstclean.output.bam
output:
temp("sequencing/pycision_to_merge/{sample}.exceptfirst_cut.bam")
shell:
"mv {input} {output}"
rule rename_secondclean:
input:
rules.pycision_secondclean.output.bam
output:
temp("sequencing/pycision_secondclean/{sample}.justmt162_cut.bam")
shell:
"mv {input} {output}"
rule bam2fastq:
input:
rules.rename_secondclean.output
output:
temp("sequencing/pycision_secondclean/{sample}.justmt162_cut.fq")
shell:
"bedtools bamtofastq -i {input} -fq {output}"
rule bwa_mem2:
input:
fa="sequencing/referencemtDNA_chrM_exceptlast.fasta",
fastq=rules.bam2fastq.output
output:
temp("sequencing/pycision_secondclean/{sample}.justmt162_realigned.bam")
shell:
"bwa mem {input.fa} {input.fastq} | samtools view -hSb - > {output}"
rule samtools_sort2:
input:
rules.bwa_mem2.output
output:
temp("sequencing/pycision_secondclean/{sample}.justmt162_realigned.sorted.bam")
shell:
"samtools sort {input} -o {output}"
rule samtools_index2:
input:
rules.samtools_sort2.output
output:
temp("sequencing/pycision_secondclean/{sample}.justmt162_realigned.sorted.bam.bai")
shell:
"samtools index {input}"
rule pycision_thirdclean:
input:
bam=rules.samtools_sort2.output,
bai=rules.samtools_index2.output
output:
bam=temp("sequencing/pycision_secondclean/{sample}.justmt162_realigned.sorted.softClipped.halfway.bam"),
sorted_bam=temp("sequencing/pycision_secondclean/{sample}.justmt162_realigned.sorted.softClipped.halfway.sorted.bam"),
bai=temp("sequencing/pycision_secondclean/{sample}.justmt162_realigned.sorted.softClipped.halfway.sorted.bam.bai")
shell:
"python3 tools/pycision.py -q 4 -f sequencing/3_firstonly.bed {input.bam}"
rule move_thirdclean:
input:
rules.pycision_thirdclean.output.bam
output:
temp("sequencing/pycision_to_merge/{sample}.mt162corrected.bam")
shell:
"mv {input} {output}"
rule samtools_bam2sam_exceptfirst:
input:
rules.move_firstclean.output
output:
temp("sequencing/pycision_to_merge/{sample}.exceptfirst_cut.sam")
shell:
"samtools view -h {input} > {output}"
rule samtools_bam2sam_mt162corrected:
input:
rules.move_thirdclean.output
output:
temp("sequencing/pycision_to_merge/{sample}.mt162corrected.sam")
shell:
"samtools view -h {input} > {output}"
rule change_length_exceptfirst:
input:
rules.samtools_bam2sam_exceptfirst.output
output:
temp("sequencing/pycision_to_merge/{sample}.exceptfirst_correctedlength.sam")
shell:
"""awk '{{ gsub("LN:16649", "LN:16569"); print }}' {input} > {output}"""
rule change_length_mt162corrected:
input:
rules.samtools_bam2sam_mt162corrected.output
output:
temp("sequencing/pycision_to_merge/{sample}.mt162corrected_correctedlength.sam")
shell:
"""awk '{{ gsub("LN:16541", "LN:16569"); print }}' {input} > {output}"""
rule sam2bam_exceptfirst:
input:
rules.change_length_exceptfirst.output
output:
temp("sequencing/pycision_to_merge/{sample}.exceptfirst_correctedlength.bam")
shell:
"samtools view -h -S -b -o {output} {input}"
rule sam2bam_mt162corrected:
input:
rules.change_length_mt162corrected.output
output:
temp("sequencing/pycision_to_merge/{sample}.mt162corrected_correctedlength.bam")
shell:
"samtools view -h -S -b -o {output} {input}"
rule samtools_sort_exceptfirst:
input:
rules.sam2bam_exceptfirst.output
output:
temp("sequencing/pycision_to_merge/{sample}.exceptfirst_correctedlength.sorted.bam")
shell:
"samtools sort {input} -o {output}"
rule samtools_sort_mt162corrected:
input:
rules.sam2bam_mt162corrected.output
output:
temp("sequencing/pycision_to_merge/{sample}.mt162corrected_correctedlength.sorted.bam")
shell:
"samtools sort {input} -o {output}"
rule samtools_index_exceptfirst:
input:
rules.samtools_sort_exceptfirst.output
output:
temp("sequencing/pycision_to_merge/{sample}.exceptfirst_correctedlength.sorted.bam.bai")
shell:
"samtools index {input}"
rule samtools_index_mt162corrected:
input:
rules.samtools_sort_mt162corrected.output
output:
temp("sequencing/pycision_to_merge/{sample}.mt162corrected_correctedlength.sorted.bam.bai")
shell:
"samtools index {input}"
rule merge:
input:
exceptfirst_bam=rules.samtools_sort_exceptfirst.output,
exceptfirst_bai=rules.samtools_index_exceptfirst.output,
mt162_bam=rules.samtools_sort_mt162corrected.output,
mt162_bai=rules.samtools_index_mt162corrected.output
output:
protected("sequencing/merged/{sample}.merged.bam")
shell:
"samtools merge {output} {input.mt162_bam} {input.exceptfirst_bam}"