-
Notifications
You must be signed in to change notification settings - Fork 0
/
VarScan2PostProcessing.wdl
104 lines (81 loc) · 3.26 KB
/
VarScan2PostProcessing.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
workflow VarScan2PostProcessing {
Array[File] VarScan2_output_snp_files
Array[File] VarScan2_output_indel_files
## Post-processing steps are executed for two types of variants - snps and indels.
## The steps are executed in parallel for each variant type, so scatter parallelism is perfomed.
call CombineVarScan2Output {
input:
VarScan2_snp_files_list=VarScan2_output_snp_files,
VarScan2_indel_files_list=VarScan2_output_indel_files
}
call ExtractDNMsVarScan2VCF {
input:
VarScan2_snp_VCF_file=CombineVarScan2Output.VarScan2_snp_combined,
VarScan2_indel_VCF_file=CombineVarScan2Output.VarScan2_indel_combined
}
call ListOfDNMs {
input:
VarScan2_snp_file=ExtractDNMsVarScan2VCF.VarScan2_snp_DNMs_file,
VarScan2_indel_file=ExtractDNMsVarScan2VCF.VarScan2_indel_DNMs_file
}
}
## Output vcf files from VarScan2 caller are combined in this task.
## VarScan2 caller produces vcf files for snps and indels separately for each chromosome.
task CombineVarScan2Output {
Array[File] VarScan2_snp_files_list
Array[File] VarScan2_indel_files_list
command {
vcf-concat ${sep=" " VarScan2_snp_files_list} > VarScan2_snp_combined.vcf;vcf-concat ${sep=" " VarScan2_indel_files_list} > VarScan2_indel_combined.vcf
}
runtime {
docker: "pegi3s/vcftools"
memory: "1GB"
cpu: 1
disks: "local-disk"
}
output {
File VarScan2_snp_combined = "VarScan2_snp_combined.vcf"
File VarScan2_indel_combined = "VarScan2_indel_combined.vcf"
}
}
## In this task, variants with DENOVO status and PASS filter are extracted from VCF files.
## These are real de novo mutations.
task ExtractDNMsVarScan2VCF {
File VarScan2_snp_VCF_file
File VarScan2_indel_VCF_file
command {
java -jar /snpEff/SnpSift.jar filter "( FILTER = 'PASS' & exists DENOVO & GEN[0].GT == '0/0' & GEN[1].GT == '0/0' & GEN[2].GT == '0/1')" ${VarScan2_snp_VCF_file} > VarScan2_snp_DNMs_file.vcf
java -jar /snpEff/SnpSift.jar filter "( FILTER = 'PASS' & exists DENOVO & GEN[0].GT == '0/0' & GEN[1].GT == '0/0' & GEN[2].GT == '0/1')" ${VarScan2_indel_VCF_file} > VarScan2_indel_DNMs_file.vcf
}
runtime {
## docker: "openjdk:11.0-jdk"
docker: "gscuser/snpsift:5.0e"
memory: "2GB"
cpu: 1
disks: "local-disk"
}
output {
File VarScan2_snp_DNMs_file = "VarScan2_snp_DNMs_file.vcf"
File VarScan2_indel_DNMs_file = "VarScan2_indel_DNMs_file.vcf"
}
}
## In this task, list of chromosome and positions for de novo mutations is produced as output.
## Output file for snp and indels are separated depending on the variant_type provided as input.
task ListOfDNMs {
File VarScan2_snp_file
File VarScan2_indel_file
command {
grep "^chr" ${VarScan2_snp_file} | cut -f1,2 | sed 's/\t/|/g' | sort | uniq > VarScan2_listof_snps_file.txt | \
grep "^chr" ${VarScan2_indel_file} | cut -f1,2 | sed 's/\t/|/g' | sort | uniq > VarScan2_listof_indels_file.txt
}
runtime {
docker: "ubuntu:18.04"
memory: "1GB"
cpu: 1
disks: "local-disk"
}
output {
File VarScan2_list_of_snps_output = "VarScan2_listof_snps_file.txt"
File VarScan2_list_of_indels_output = "VarScan2_listof_indels_file.txt"
}
}