@@ -39,6 +39,7 @@ params.estimate_contamination = null
39
39
params. filter_readorientation = null
40
40
params. genotype = null
41
41
params. ref_RNA = " NO_REF_RNA_FILE"
42
+ params. ext = " cram"
42
43
43
44
params. help = null
44
45
@@ -131,6 +132,7 @@ log.info '-------------------------------------------------------------'
131
132
log. info " genotype = ${ params.genotype} "
132
133
log. info " ref = ${ params.ref} "
133
134
log. info " ref_RNA = ${ params.ref_RNA} "
135
+ log. info " ext = ${ params.ext} "
134
136
}
135
137
136
138
// load reference
@@ -139,6 +141,11 @@ fasta_ref_fai = file( params.ref+'.fai' )
139
141
fasta_ref_gzi = file( params. ref+ ' .gzi' )
140
142
fasta_ref_dict = file( params. ref. replace(" .fasta" ," .dict" ). replace(" .fa" ," .dict" ) )
141
143
144
+
145
+ ext_ind = " .crai"
146
+ if (params. ext== " bam" ){ ext_ind= " .bai" }
147
+
148
+
142
149
if (params. genotype){
143
150
if (params. ref_RNA == " NO_REF_RNA_FILE" ){
144
151
fasta_ref_RNA = file( params. ref )
@@ -190,10 +197,10 @@ if (params.PON) {
190
197
if (params. tn_file) {
191
198
// FOR INPUT AS A TAB DELIMITED FILE
192
199
pairs = Channel . fromPath(params. tn_file). splitCsv(header : true , sep : ' \t ' , strip : true )
193
- .map{ row -> [ row. sample , file(row. tumor), file(row. tumor+ ' .bai ' ), file(row. normal), file(row. normal+ ' .bai ' ) ] }
200
+ .map{ row -> [ row. sample , file(row. tumor), file(row. tumor+ ext_ind ), file(row. normal), file(row. normal+ ext_ind ) ] }
194
201
195
202
pairs2 = Channel . fromPath(params. tn_file). splitCsv(header : true , sep : ' \t ' , strip : true )
196
- .map{ row -> [ row. sample , file(row. tumor), file(row. tumor+ ' .bai ' ), file(row. normal), file(row. normal+ ' .bai ' ) ] }
203
+ .map{ row -> [ row. sample , file(row. tumor), file(row. tumor+ ext_ind ), file(row. normal), file(row. normal+ ext_ind ) ] }
197
204
198
205
tn_bambai2 = pairs2. groupTuple(by : 0 )
199
206
.map { row -> tuple(row[0 ] , row[1 ], row[2 ] , row[3 ][0 ] , row[4 ][0 ] ) }
@@ -203,23 +210,24 @@ if (params.tn_file) {
203
210
204
211
if (params. estimate_contamination){
205
212
pairsT4cont = Channel . fromPath(params. tn_file). splitCsv(header : true , sep : ' \t ' , strip : true )
206
- .map{ row -> [ row. sample , ' T' , file(row. tumor), file(row. tumor+ ' .bai ' ) ] }
213
+ .map{ row -> [ row. sample , ' T' , file(row. tumor), file(row. tumor+ ext_ind ) ] }
207
214
pairsN4cont = Channel . fromPath(params. tn_file). splitCsv(header : true , sep : ' \t ' , strip : true )
208
- .map{ row -> [ row. sample , ' N' , file(row. normal), file(row. normal+ ' .bai ' ) ] }
215
+ .map{ row -> [ row. sample , ' N' , file(row. normal), file(row. normal+ ext_ind ) ] }
209
216
.unique()
210
217
pairs4cont = pairsT4cont. concat( pairsN4cont )
211
218
}
212
219
} else {
213
220
// FOR INPUT AS TWO FOLDER
214
221
// recovering of bam files
215
- tumor_bams = Channel . fromPath( params. tumor_bam_folder+ ' /*' + params. suffix_tumor+ ' .bam' )
216
- .ifEmpty { error " Cannot find any bam file in: ${ params.tumor_bam_folder} " }
217
- .map { path -> [ path. name. replace(" ${ params.suffix_tumor} .bam" ," " ), path ] }
222
+ ext_ind
223
+ tumor_bams = Channel . fromPath( params. tumor_bam_folder+ ' /*' + params. suffix_tumor+ ' .' + params. ext )
224
+ .ifEmpty { error " Cannot find any bam/cram file in: ${ params.tumor_bam_folder} " }
225
+ .map { path -> [ path. name. replace(" ${ params.suffix_tumor} ." + params. ext," " ), path ] }
218
226
219
227
// recovering of bai files
220
- tumor_bais = Channel . fromPath( params. tumor_bam_folder+ ' /*' + params. suffix_tumor+ ' .bam.bai ' )
221
- .ifEmpty { error " Cannot find any bai file in: ${ params.tumor_bam_folder} " }
222
- .map { path -> [ path. name. replace(" ${ params.suffix_tumor} .bam.bai " ," " ), path ] }
228
+ tumor_bais = Channel . fromPath( params. tumor_bam_folder+ ' /*' + params. suffix_tumor+ ' .' + params . ext + ext_ind )
229
+ .ifEmpty { error " Cannot find any bai/crai file in: ${ params.tumor_bam_folder} " }
230
+ .map { path -> [ path. name. replace(" ${ params.suffix_tumor} ." + params . ext + ext_ind ," " ), path ] }
223
231
224
232
// building bam-bai pairs
225
233
tumor_bam_bai = tumor_bams
@@ -228,14 +236,14 @@ if (params.tn_file) {
228
236
229
237
// FOR NORMAL
230
238
// recovering of bam files
231
- normal_bams = Channel . fromPath( params. normal_bam_folder+ ' /*' + params. suffix_normal+ ' .bam ' )
232
- .ifEmpty { error " Cannot find any bam file in: ${ params.normal_bam_folder} " }
233
- .map { path -> [ path. name. replace(" ${ params.suffix_normal} .bam " ," " ), path ] }
239
+ normal_bams = Channel . fromPath( params. normal_bam_folder+ ' /*' + params. suffix_normal+ ' .' + params . ext )
240
+ .ifEmpty { error " Cannot find any bam/cram file in: ${ params.normal_bam_folder} " }
241
+ .map { path -> [ path. name. replace(" ${ params.suffix_normal} ." + params . ext ," " ), path ] }
234
242
235
243
// recovering of bai files
236
- normal_bais = Channel . fromPath( params. normal_bam_folder+ ' /*' + params. suffix_normal+ ' .bam.bai ' )
237
- .ifEmpty { error " Cannot find any bai file in: ${ params.normal_bam_folder} " }
238
- .map { path -> [ path. name. replace(" ${ params.suffix_normal} .bam.bai " ," " ), path ] }
244
+ normal_bais = Channel . fromPath( params. normal_bam_folder+ ' /*' + params. suffix_normal+ ' .' + params . ext + ext_ind )
245
+ .ifEmpty { error " Cannot find any bai/crai file in: ${ params.normal_bam_folder} " }
246
+ .map { path -> [ path. name. replace(" ${ params.suffix_normal} ." + params . ext + ext_ind ," " ), path ] }
239
247
240
248
// building bam-bai pairs
241
249
normal_bam_bai = normal_bams
@@ -248,12 +256,12 @@ if (params.tn_file) {
248
256
.map {tumor_bb, normal_bb -> [ tumor_bb[0 ], tumor_bb[1 ], tumor_bb[2 ], normal_bb[1 ], normal_bb[2 ] ] }
249
257
// here each element X of tn_bambai channel is a 4-uplet. X[0] is the tumor bam, X[1] the tumor bai, X[2] the normal bam and X[3] the normal bai.
250
258
if (params. estimate_contamination){
251
- pairsT4cont = Channel . fromPath( params. tumor_bam_folder+ ' /*' + params. suffix_tumor+ ' .bam ' )
252
- .map { path -> [ path. name. replace(" ${ params.suffix_tumor} .bam " ," " ), ' T' ,
253
- file(path), file(path + ' .bai ' ) ] }
254
- pairsN4cont = Channel . fromPath( params. normal_bam_folder+ ' /*' + params. suffix_normal+ ' .bam ' )
255
- .map { path -> [ path. name. replace(" ${ params.suffix_normal} .bam " ," " ), ' N' ,
256
- file(path), file(path + ' .bai ' ) ] }
259
+ pairsT4cont = Channel . fromPath( params. tumor_bam_folder+ ' /*' + params. suffix_tumor+ ' .' + params . ext )
260
+ .map { path -> [ path. name. replace(" ${ params.suffix_tumor} ." + params . ext ," " ), ' T' ,
261
+ file(path), file(path + ext_ind ) ] }
262
+ pairsN4cont = Channel . fromPath( params. normal_bam_folder+ ' /*' + params. suffix_normal+ ' .' + params . ext )
263
+ .map { path -> [ path. name. replace(" ${ params.suffix_normal} ." + params . ext ," " ), ' N' ,
264
+ file(path), file(path + ext_ind ) ] }
257
265
.unique()
258
266
pairs4cont = pairsT4cont. concat( pairsN4cont )
259
267
}
@@ -274,8 +282,8 @@ if (params.tn_file) {
274
282
if (params. genotype){
275
283
pairs2 = Channel . fromPath(params. tn_file). splitCsv(header : true , sep : ' \t ' , strip : true )
276
284
.map{ row -> [ row. sample , row. preproc, file(row. tumor),
277
- file(row. tumor+ ' .bai ' ), file(row. normal),
278
- file(row. normal+ ' .bai ' ), file(row. vcf) ] }
285
+ file(row. tumor+ ext_ind ), file(row. normal),
286
+ file(row. normal+ ext_ind ), file(row. vcf) ] }
279
287
280
288
pairs2. branch{
281
289
bam2preproc : it[1 ]== " yes"
@@ -299,7 +307,7 @@ process RNAseq_preproc_fixMCNDN_fixMQ{
299
307
'''
300
308
if [ -L "None" ]; then unlink None; unlink None.bai; touch None;touch None.bai; fi
301
309
if [ -L "none" ]; then unlink none; unlink none.bai; touch none;touch none.bai; fi
302
- SM=`samtools view -H !{bam} | grep SM | head -1 | awk '{print $4 }' | cut -c 4-`
310
+ SM=`samtools view -H !{bam} | grep "^@RG" | head -1 | awk '{print $NF }' | cut -c 4-`
303
311
python !{baseDir}/bin/correctNDN.py !{bam} !{sample}_$SM"_MCNDNfixed.bam"
304
312
samtools view -H !{sample}_$SM"_MCNDNfixed.bam" | sed -e "s/SM:"$SM"/SM:"$SM"_MCNDNfixed/" | samtools reheader - !{sample}_$SM"_MCNDNfixed.bam" > !{sample}_$SM"_MCNDNfixed_rehead.bam"
305
313
samtools index !{sample}_$SM"_MCNDNfixed_rehead.bam" !{sample}_$SM"_MCNDNfixed_rehead.bai"
@@ -323,7 +331,7 @@ process RNAseq_preproc_split{
323
331
shell:
324
332
new_tag = sample+ " _MCNDNfixed_split"
325
333
'''
326
- SM=`samtools view -H !{bam} | grep SM | head -1 | awk '{print $4 }' | cut -c 4-`
334
+ SM=`samtools view -H !{bam} | grep "^@RG" | head -1 | awk '{print $NF }' | cut -c 4-`
327
335
gatk SplitNCigarReads --java-options "-Xmx!{params.mem}G -Djava.io.tmpdir=$PWD" --add-output-sam-program-record -fixNDN true -R !{fasta_ref_RNA} -I !{bam} -O !{new_tag}_$SM.bam
328
336
'''
329
337
}
@@ -376,7 +384,7 @@ process genotype{
376
384
}
377
385
'''
378
386
!{baseDir}/bin/prep_vcf_bed.sh !{known_snp} !{PON}
379
- normal_name=`samtools view -H !{bamN} | grep SM | head -1 | awk '{print $4 }' | cut -c 4-`
387
+ normal_name=`samtools view -H !{bamN} | grep "^@RG" | head -1 | awk '{print $NF }' | cut -c 4-`
380
388
gatk IndexFeatureFile -I !{vcf}
381
389
gatk Mutect2 --java-options "-Xmx!{params.mem}G" -R !{fasta_ref} !{known_snp_option} !{PON_option} !{input_t} !{input_n} \
382
390
-O !{printed_tag}_genotyped.vcf !{params.mutect_args} --alleles !{vcf} -L regions.bed --disable-read-filter NonChimericOriginalAlignmentReadFilter --disable-read-filter NotDuplicateReadFilter \
@@ -488,7 +496,7 @@ process mutect {
488
496
PON_option = " "
489
497
}
490
498
'''
491
- normal_name=`samtools view -H !{bamN} | grep SM | head -1 | awk '{print $4 }' | cut -c 4-`
499
+ normal_name=`samtools view -H !{bamN} | grep "^@RG" | head -1 | awk '{print $NF }' | cut -c 4-`
492
500
gatk Mutect2 --java-options "-Xmx!{params.mem}G" -R !{fasta_ref} !{known_snp_option} !{PON_option} \
493
501
!{input_t} !{input_n} -O !{printed_tag}_calls.vcf -L !{bed} !{params.mutect_args} --f1r2-tar-gz !{printed_tag}_f1r2.tar.gz
494
502
'''
0 commit comments