Skip to content

Commit 3e29ec0

Browse files
authored
Merge pull request #31 from IARCbioinfo/dev-adg
version 2.3
2 parents 69c3908 + 4021d9a commit 3e29ec0

File tree

10 files changed

+312
-39
lines changed

10 files changed

+312
-39
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ FROM continuumio/miniconda3:4.7.12
66
LABEL base_image="continuumio/miniconda3"
77
LABEL version="4.7.12"
88
LABEL software="mutect-nf"
9-
LABEL software.version="2.2"
9+
LABEL software.version="2.3"
1010
LABEL about.summary="Container image containing all requirements for mutect-nf"
1111
LABEL about.home="http://github.com/IARCbioinfo/mutect-nf"
1212
LABEL about.documentation="http://github.com/IARCbioinfo/mutect-nf/README.md"

Dockerfile_gatk2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ FROM continuumio/miniconda3:4.7.12
66
LABEL base_image="continuumio/miniconda3"
77
LABEL version="4.7.12"
88
LABEL software="mutect-nf_gatk2"
9-
LABEL software.version="2.2"
9+
LABEL software.version="2.3"
1010
LABEL about.summary="Container image containing all requirements for mutect-nf with gatk2 (mutect1)"
1111
LABEL about.home="http://github.com/IARCbioinfo/mutect-nf"
1212
LABEL about.documentation="http://github.com/IARCbioinfo/mutect-nf/README.md"

Dockerfile_gatk3

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ FROM continuumio/miniconda3:4.7.12
66
LABEL base_image="continuumio/miniconda3"
77
LABEL version="4.7.12"
88
LABEL software="mutect-nf_gatk3"
9-
LABEL software.version="2.2"
9+
LABEL software.version="2.3"
1010
LABEL about.summary="Container image containing all requirements for mutect-nf with gatk3"
1111
LABEL about.home="http://github.com/IARCbioinfo/mutect-nf"
1212
LABEL about.documentation="http://github.com/IARCbioinfo/mutect-nf/README.md"

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# mutect-nf
2+
3+
24
## Mutect pipeline for somatic variant calling with Nextflow
35
[![CircleCI](https://circleci.com/gh/IARCbioinfo/mutect-nf/tree/master.svg?style=svg)](https://circleci.com/gh/IARCbioinfo/mutect-nf/tree/master)
46
[![Docker Hub](https://img.shields.io/badge/docker-ready-blue.svg)](https://hub.docker.com/r/iarcbioinfo/mutect-nf/)

dag.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@
226226
{ data: { source: 'p49', target: 'p51', label: 'pileupsT0' } },
227227
{ data: { source: 'p50', target: 'p52', label: 'pileupsN' } },
228228
{ data: { source: 'p50', target: 'p54', label: 'pileupsN4pr' } },
229-
{ data: { source: 'p51', target: 'p52', label: 'pileupsT' } },
230229
{ data: { source: 'p51', target: 'p54', label: 'pileupsT4pr' } },
230+
{ data: { source: 'p51', target: 'p52', label: 'pileupsT' } },
231231
{ data: { source: 'p52', target: 'p53'} },
232232
{ data: { source: 'p53', target: 'p57', label: 'pileups4cont' } },
233233
{ data: { source: 'p54', target: 'p55'} },

dag.png

-167 Bytes
Loading

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- bioconda
55
- defaults
66
dependencies:
7-
- gatk4=4.1.7.0
7+
- gatk4=4.2.0.0
88
- pysam=0.15.2
99
- bedops=2.4.37
1010
- bedtools=2.29.2

mutect.nf

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ params.estimate_contamination = null
3939
params.filter_readorientation = null
4040
params.genotype = null
4141
params.ref_RNA = "NO_REF_RNA_FILE"
42+
params.ext = "cram"
4243

4344
params.help = null
4445

@@ -131,6 +132,7 @@ log.info '-------------------------------------------------------------'
131132
log.info "genotype = ${params.genotype}"
132133
log.info "ref = ${params.ref}"
133134
log.info "ref_RNA = ${params.ref_RNA}"
135+
log.info "ext = ${params.ext}"
134136
}
135137

136138
//load reference
@@ -139,6 +141,11 @@ fasta_ref_fai = file( params.ref+'.fai' )
139141
fasta_ref_gzi = file( params.ref+'.gzi' )
140142
fasta_ref_dict = file( params.ref.replace(".fasta",".dict").replace(".fa",".dict") )
141143

144+
145+
ext_ind = ".crai"
146+
if(params.ext=="bam"){ ext_ind=".bai"}
147+
148+
142149
if(params.genotype){
143150
if(params.ref_RNA == "NO_REF_RNA_FILE"){
144151
fasta_ref_RNA = file( params.ref )
@@ -190,10 +197,10 @@ if (params.PON) {
190197
if (params.tn_file) {
191198
// FOR INPUT AS A TAB DELIMITED FILE
192199
pairs = Channel.fromPath(params.tn_file).splitCsv(header: true, sep: '\t', strip: true)
193-
.map{ row -> [ row.sample , file(row.tumor), file(row.tumor+'.bai'), file(row.normal), file(row.normal+'.bai') ] }
200+
.map{ row -> [ row.sample , file(row.tumor), file(row.tumor+ext_ind), file(row.normal), file(row.normal+ext_ind) ] }
194201

195202
pairs2 = Channel.fromPath(params.tn_file).splitCsv(header: true, sep: '\t', strip: true)
196-
.map{ row -> [ row.sample , file(row.tumor), file(row.tumor+'.bai'), file(row.normal), file(row.normal+'.bai') ] }
203+
.map{ row -> [ row.sample , file(row.tumor), file(row.tumor+ext_ind), file(row.normal), file(row.normal+ext_ind) ] }
197204

198205
tn_bambai2 = pairs2.groupTuple(by: 0)
199206
.map { row -> tuple(row[0] , row[1], row[2] , row[3][0] , row[4][0] ) }
@@ -203,23 +210,24 @@ if (params.tn_file) {
203210

204211
if(params.estimate_contamination){
205212
pairsT4cont = Channel.fromPath(params.tn_file).splitCsv(header: true, sep: '\t', strip: true)
206-
.map{ row -> [ row.sample , 'T' , file(row.tumor), file(row.tumor+'.bai') ] }
213+
.map{ row -> [ row.sample , 'T' , file(row.tumor), file(row.tumor+ext_ind) ] }
207214
pairsN4cont = Channel.fromPath(params.tn_file).splitCsv(header: true, sep: '\t', strip: true)
208-
.map{ row -> [ row.sample , 'N', file(row.normal), file(row.normal+'.bai') ] }
215+
.map{ row -> [ row.sample , 'N', file(row.normal), file(row.normal+ext_ind) ] }
209216
.unique()
210217
pairs4cont = pairsT4cont.concat( pairsN4cont )
211218
}
212219
} else {
213220
// FOR INPUT AS TWO FOLDER
214221
// recovering of bam files
215-
tumor_bams = Channel.fromPath( params.tumor_bam_folder+'/*'+params.suffix_tumor+'.bam' )
216-
.ifEmpty { error "Cannot find any bam file in: ${params.tumor_bam_folder}" }
217-
.map { path -> [ path.name.replace("${params.suffix_tumor}.bam",""), path ] }
222+
ext_ind
223+
tumor_bams = Channel.fromPath( params.tumor_bam_folder+'/*'+params.suffix_tumor+'.'+params.ext )
224+
.ifEmpty { error "Cannot find any bam/cram file in: ${params.tumor_bam_folder}" }
225+
.map { path -> [ path.name.replace("${params.suffix_tumor}."+params.ext,""), path ] }
218226

219227
// recovering of bai files
220-
tumor_bais = Channel.fromPath( params.tumor_bam_folder+'/*'+params.suffix_tumor+'.bam.bai' )
221-
.ifEmpty { error "Cannot find any bai file in: ${params.tumor_bam_folder}" }
222-
.map { path -> [ path.name.replace("${params.suffix_tumor}.bam.bai",""), path ] }
228+
tumor_bais = Channel.fromPath( params.tumor_bam_folder+'/*'+params.suffix_tumor+'.'+params.ext+ext_ind )
229+
.ifEmpty { error "Cannot find any bai/crai file in: ${params.tumor_bam_folder}" }
230+
.map { path -> [ path.name.replace("${params.suffix_tumor}."+params.ext+ext_ind,""), path ] }
223231

224232
// building bam-bai pairs
225233
tumor_bam_bai = tumor_bams
@@ -228,14 +236,14 @@ if (params.tn_file) {
228236

229237
// FOR NORMAL
230238
// recovering of bam files
231-
normal_bams = Channel.fromPath( params.normal_bam_folder+'/*'+params.suffix_normal+'.bam' )
232-
.ifEmpty { error "Cannot find any bam file in: ${params.normal_bam_folder}" }
233-
.map { path -> [ path.name.replace("${params.suffix_normal}.bam",""), path ] }
239+
normal_bams = Channel.fromPath( params.normal_bam_folder+'/*'+params.suffix_normal+'.'+params.ext )
240+
.ifEmpty { error "Cannot find any bam/cram file in: ${params.normal_bam_folder}" }
241+
.map { path -> [ path.name.replace("${params.suffix_normal}."+params.ext,""), path ] }
234242

235243
// recovering of bai files
236-
normal_bais = Channel.fromPath( params.normal_bam_folder+'/*'+params.suffix_normal+'.bam.bai' )
237-
.ifEmpty { error "Cannot find any bai file in: ${params.normal_bam_folder}" }
238-
.map { path -> [ path.name.replace("${params.suffix_normal}.bam.bai",""), path ] }
244+
normal_bais = Channel.fromPath( params.normal_bam_folder+'/*'+params.suffix_normal+'.'+params.ext+ext_ind )
245+
.ifEmpty { error "Cannot find any bai/crai file in: ${params.normal_bam_folder}" }
246+
.map { path -> [ path.name.replace("${params.suffix_normal}."+params.ext+ext_ind,""), path ] }
239247

240248
// building bam-bai pairs
241249
normal_bam_bai = normal_bams
@@ -248,12 +256,12 @@ if (params.tn_file) {
248256
.map {tumor_bb, normal_bb -> [ tumor_bb[0], tumor_bb[1], tumor_bb[2], normal_bb[1], normal_bb[2] ] }
249257
// here each element X of tn_bambai channel is a 4-uplet. X[0] is the tumor bam, X[1] the tumor bai, X[2] the normal bam and X[3] the normal bai.
250258
if(params.estimate_contamination){
251-
pairsT4cont = Channel.fromPath( params.tumor_bam_folder+'/*'+params.suffix_tumor+'.bam' )
252-
.map { path -> [ path.name.replace("${params.suffix_tumor}.bam",""), 'T',
253-
file(path), file(path + '.bai') ] }
254-
pairsN4cont = Channel.fromPath( params.normal_bam_folder+'/*'+params.suffix_normal+'.bam' )
255-
.map { path -> [ path.name.replace("${params.suffix_normal}.bam",""), 'N',
256-
file(path), file(path + '.bai') ] }
259+
pairsT4cont = Channel.fromPath( params.tumor_bam_folder+'/*'+params.suffix_tumor+'.'+params.ext )
260+
.map { path -> [ path.name.replace("${params.suffix_tumor}."+params.ext,""), 'T',
261+
file(path), file(path +ext_ind) ] }
262+
pairsN4cont = Channel.fromPath( params.normal_bam_folder+'/*'+params.suffix_normal+'.'+params.ext )
263+
.map { path -> [ path.name.replace("${params.suffix_normal}."+params.ext,""), 'N',
264+
file(path), file(path +ext_ind ) ] }
257265
.unique()
258266
pairs4cont = pairsT4cont.concat( pairsN4cont )
259267
}
@@ -274,8 +282,8 @@ if (params.tn_file) {
274282
if(params.genotype){
275283
pairs2 = Channel.fromPath(params.tn_file).splitCsv(header: true, sep: '\t', strip: true)
276284
.map{ row -> [ row.sample , row.preproc, file(row.tumor),
277-
file(row.tumor+'.bai'), file(row.normal),
278-
file(row.normal+'.bai'), file(row.vcf) ] }
285+
file(row.tumor+ext_ind), file(row.normal),
286+
file(row.normal+ext_ind), file(row.vcf) ] }
279287

280288
pairs2.branch{
281289
bam2preproc: it[1]=="yes"
@@ -299,7 +307,7 @@ process RNAseq_preproc_fixMCNDN_fixMQ{
299307
'''
300308
if [ -L "None" ]; then unlink None; unlink None.bai; touch None;touch None.bai; fi
301309
if [ -L "none" ]; then unlink none; unlink none.bai; touch none;touch none.bai; fi
302-
SM=`samtools view -H !{bam} | grep SM | head -1 | awk '{print $4}' | cut -c 4-`
310+
SM=`samtools view -H !{bam} | grep "^@RG" | head -1 | awk '{print $NF}' | cut -c 4-`
303311
python !{baseDir}/bin/correctNDN.py !{bam} !{sample}_$SM"_MCNDNfixed.bam"
304312
samtools view -H !{sample}_$SM"_MCNDNfixed.bam" | sed -e "s/SM:"$SM"/SM:"$SM"_MCNDNfixed/" | samtools reheader - !{sample}_$SM"_MCNDNfixed.bam" > !{sample}_$SM"_MCNDNfixed_rehead.bam"
305313
samtools index !{sample}_$SM"_MCNDNfixed_rehead.bam" !{sample}_$SM"_MCNDNfixed_rehead.bai"
@@ -323,7 +331,7 @@ process RNAseq_preproc_split{
323331
shell:
324332
new_tag = sample+"_MCNDNfixed_split"
325333
'''
326-
SM=`samtools view -H !{bam} | grep SM | head -1 | awk '{print $4}' | cut -c 4-`
334+
SM=`samtools view -H !{bam} | grep "^@RG" | head -1 | awk '{print $NF}' | cut -c 4-`
327335
gatk SplitNCigarReads --java-options "-Xmx!{params.mem}G -Djava.io.tmpdir=$PWD" --add-output-sam-program-record -fixNDN true -R !{fasta_ref_RNA} -I !{bam} -O !{new_tag}_$SM.bam
328336
'''
329337
}
@@ -376,7 +384,7 @@ process genotype{
376384
}
377385
'''
378386
!{baseDir}/bin/prep_vcf_bed.sh !{known_snp} !{PON}
379-
normal_name=`samtools view -H !{bamN} | grep SM | head -1 | awk '{print $4}' | cut -c 4-`
387+
normal_name=`samtools view -H !{bamN} | grep "^@RG" | head -1 | awk '{print $NF}' | cut -c 4-`
380388
gatk IndexFeatureFile -I !{vcf}
381389
gatk Mutect2 --java-options "-Xmx!{params.mem}G" -R !{fasta_ref} !{known_snp_option} !{PON_option} !{input_t} !{input_n} \
382390
-O !{printed_tag}_genotyped.vcf !{params.mutect_args} --alleles !{vcf} -L regions.bed --disable-read-filter NonChimericOriginalAlignmentReadFilter --disable-read-filter NotDuplicateReadFilter \
@@ -488,7 +496,7 @@ process mutect {
488496
PON_option = ""
489497
}
490498
'''
491-
normal_name=`samtools view -H !{bamN} | grep SM | head -1 | awk '{print $4}' | cut -c 4-`
499+
normal_name=`samtools view -H !{bamN} | grep "^@RG" | head -1 | awk '{print $NF}' | cut -c 4-`
492500
gatk Mutect2 --java-options "-Xmx!{params.mem}G" -R !{fasta_ref} !{known_snp_option} !{PON_option} \
493501
!{input_t} !{input_n} -O !{printed_tag}_calls.vcf -L !{bed} !{params.mutect_args} --f1r2-tar-gz !{printed_tag}_f1r2.tar.gz
494502
'''

0 commit comments

Comments
 (0)