-
Notifications
You must be signed in to change notification settings - Fork 0
/
VarScan2Pipeline.wdl
145 lines (121 loc) · 4.08 KB
/
VarScan2Pipeline.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import "VarScan2PostProcessing.wdl" as VarScan2_post
workflow VarScan2Pipeline {
File father_bam
File father_bam_bai
File mother_bam
File mother_bam_bai
File child_bam
File child_bam_bai
File reference
File reference_fai
File reference_dict
Array[Int] chromosome_ids
Array[String] chromosomes = prefix("chr", chromosome_ids)
## Call samtools mpileup and VarScan2 caller tasks per chromosome as scatter parallelism
scatter (chromosome in chromosomes) {
call SamtoolsMpileup {
input:
chromosome=chromosome,
reference=reference,
reference_fai=reference_fai,
father_bam=father_bam,
father_bam_bai=father_bam_bai,
mother_bam=mother_bam,
mother_bam_bai=mother_bam_bai,
child_bam=child_bam,
child_bam_bai=child_bam_bai
}
call Varscan2Caller {
input:
chromosome=chromosome,
mpileup_file=SamtoolsMpileup.mpileup_file
}
}
## Call VarScan2 postprocessing tasks for variant type - snps and indels.
## There are three post-processing tasks called from VarScan2_postprocessing.wdl.
## 1. Combine output VCF files : Task "CombineVarScan2Output"
## 2. Extract DENOVO type of variants from combined VCF : Task "ExtractDNMsVarScan2VCF"
## 3. Generate list of de novo mutations : Task : ListOfDNMs
call VarScan2_post.CombineVarScan2Output as CombineVarScan2Output {
input:
VarScan2_snp_files_list=Varscan2Caller.snp_varscan_file,
VarScan2_indel_files_list=Varscan2Caller.indel_varscan_file
}
call VarScan2_post.ExtractDNMsVarScan2VCF as ExtractDNMsVarScan2VCF {
input:
VarScan2_snp_VCF_file=CombineVarScan2Output.VarScan2_snp_combined,
VarScan2_indel_VCF_file=CombineVarScan2Output.VarScan2_indel_combined
}
call VarScan2_post.ListOfDNMs as ListOfDNMs {
input:
VarScan2_snp_file=ExtractDNMsVarScan2VCF.VarScan2_snp_DNMs_file,
VarScan2_indel_file=ExtractDNMsVarScan2VCF.VarScan2_indel_DNMs_file
}
output {
File VarScan2_snp_combined = CombineVarScan2Output.VarScan2_snp_combined
File VarScan2_indel_combined = CombineVarScan2Output.VarScan2_indel_combined
File VarScan2_snp_DNMs_file = ExtractDNMsVarScan2VCF.VarScan2_snp_DNMs_file
File VarScan2_indel_DNMs_file = ExtractDNMsVarScan2VCF.VarScan2_indel_DNMs_file
File VarScan2_list_of_snps_output = ListOfDNMs.VarScan2_list_of_snps_output
File VarScan2_list_of_indels_output = ListOfDNMs.VarScan2_list_of_indels_output
}
}
## This is the pre-processing step required for VarScan2 caller.
## mpileup files are generated by samtools command. It is run for each chromosome.
## The task requires trio BAM files, reference files and chromosome name.
## Output bcf mpileup file is generated for each chromosome.
task SamtoolsMpileup {
String chromosome
File reference
File reference_fai
File father_bam
File father_bam_bai
File mother_bam
File mother_bam_bai
File child_bam
File child_bam_bai
runtime {
docker: "biocontainers/samtools:v1.3.1_cv4"
memory: "8GB"
cpu: 2
disks: "local-disk"
maxRetries: 3
}
command {
samtools mpileup \
-r ${chromosome} \
-B -q 1 \
-f ${reference} \
${father_bam} ${mother_bam} ${child_bam} \
-o ${chromosome}.mpileup.bcf
}
output {
File mpileup_file = "${chromosome}.mpileup.bcf"
}
}
## This task runs VarScan2 caller per chromosome.
## It takes bcf mpileup generated from SamtoolsMpileup task and chromosome name.
## It generates VCF file for snp and indels separately for each chromosome.
task Varscan2Caller {
String chromosome
File mpileup_file
runtime {
docker: "quay.io/biocontainers/varscan:2.4.2--2"
memory: "4GB"
cpu: 2
disks: "local-disk"
maxRetries: 3
}
command {
varscan trio ${mpileup_file} ${chromosome}_varscan2 \
--min-coverage 10 \
--min-var-freq 0.20 \
--p-value 0.05 \
-adj-var-freq 0.05 \
-adj-p-value 0.15
}
output {
File snp_varscan_file = "${chromosome}_varscan2.snp.vcf"
File indel_varscan_file = "${chromosome}_varscan2.indel.vcf"
}
}