Skip to content

Commit

Permalink
Merge pull request #66 from Cristianetaniguti/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
Cristianetaniguti authored Jan 21, 2023
2 parents acca101 + 09c04ef commit 05f5913
Show file tree
Hide file tree
Showing 117 changed files with 43,398 additions and 311 deletions.
3 changes: 1 addition & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ jobs:
- checkout

- run: pip3 install -r tests/requirements.txt
- run: pytest --git-aware --tag freebayes tests/
- run: pytest --git-aware --tag gatk tests/
- run: pytest --git-aware tests/subworkflows/genotyping_empirical/freebayes/polyrad

release-to-github:
<<: *machine_default
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ figures_gabaritos
**/__pycache__/**
import_paths.txt
releases/
2023*
_LAST
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,4 @@ Check more information and examples of usage in:
- [updog](https://github.com/dcgerard/updog) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Flexible Genotyping of Polyploids using Next Generation Sequencing Data
- [polyRAD](https://github.com/lvclark/polyRAD) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Genotype Calling with Uncertainty from Sequencing Data in Polyploids
- [Reads2MapApp](https://github.com/Cristianetaniguti/Reads2MapApp) in [cristaniguti/reads2mapApp:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Shiny app to evaluate Reads2Map workflows results
- [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations.
- [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations
15 changes: 15 additions & 0 deletions pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# 1.2.0

* Add MAPpoly to build linkage maps for polyploid species
* Adjust runtimes
* Add polyploid dataset for tests

# 1.1.0

2022-12-05

* Include optional values to skip some of the sub-workflows
* Accepts any number of VCF files as input
* Dosage calling with updog, polyRAD and SuperMASSA adapted to polyploids
* More tests and example data sets included

# 1.0.0

Initial release
Expand Down
234 changes: 149 additions & 85 deletions pipelines/EmpiricalMaps/EmpiricalMaps.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,166 +6,230 @@ import "../../structs/population_structs.wdl"
import "../../tasks/utils.wdl" as utils
import "../../tasks/utilsR.wdl" as utilsR
import "../../tasks/JointReports.wdl" as reports
import "../../tasks/mappoly.wdl" as mappoly_task

import "../../subworkflows/genotyping_empirical.wdl" as genotyping
import "../../subworkflows/snpcaller_maps_empirical.wdl" as snpcaller
import "../../subworkflows/gusmap_maps_empirical.wdl" as gusmap
import "../../subworkflows/mappoly_maps_empirical.wdl" as mappoly_sub

workflow Maps {

input {
Dataset dataset
Array[File] vcfs
Array[String] vcfs_software
Array[String] vcfs_counts_source
Boolean filter_noninfo
String replaceADbyMissing
File? gatk_vcf_multi
String gatk_mchap
File? gatk_vcf
File? freebayes_vcf
File? gatk_vcf_bam_counts
File? freebayes_vcf_bam_counts
String? filters
Int max_cores
Int ploidy
}

if (defined(filters)) {
call utils.ApplyRandomFilters {
call utils.ApplyRandomFiltersArray {
input:
gatk_vcf = gatk_vcf,
freebayes_vcf = freebayes_vcf,
gatk_vcf_bam_counts = gatk_vcf_bam_counts,
freebayes_vcf_bam_counts = freebayes_vcf_bam_counts,
vcfs = vcfs,
vcfs_software = vcfs_software,
vcfs_counts_source = vcfs_counts_source,
filters = filters,
chromosome = dataset.chromosome
}
}

File filtered_gatk_vcf = select_first([ApplyRandomFilters.gatk_vcf_filt, gatk_vcf])
File filtered_gatk_vcf_bamcounts = select_first([ApplyRandomFilters.gatk_vcf_bam_counts_filt, gatk_vcf_bam_counts])
File filtered_freebayes_vcf = select_first([ApplyRandomFilters.freebayes_vcf_filt, freebayes_vcf])
File filtered_freebayes_vcf_bamcounts = select_first([ApplyRandomFilters.freebayes_vcf_bam_counts_filt, freebayes_vcf_bam_counts])

PopulationAnalysis gatk_processing = {"method": "gatk", "vcf": filtered_gatk_vcf, "bam": filtered_gatk_vcf_bamcounts}
PopulationAnalysis freebayes_processing = {"method": "freebayes", "vcf": filtered_freebayes_vcf, "bam": filtered_freebayes_vcf_bamcounts}
Array[File] filtered_vcfs = select_first([ApplyRandomFiltersArray.vcfs_filt, vcfs])

# Re-Genotyping with updog, supermassa and polyrad; and building maps with onemap
scatter (analysis in [gatk_processing, freebayes_processing]) {

Map[String, File] vcfs = {"vcf": analysis.vcf, "bam": analysis.bam}

scatter (origin in ["vcf", "bam"]) {
scatter (idx in range(length(filtered_vcfs))) {

call utils.SplitMarkers as splitgeno {
input:
vcf_file = vcfs[origin]
}

# Suggestion to improve performance of SuperMASSA, polyRAD and updog
call utilsR.FilterSegregation {
call utils.SplitMarkers as splitgeno {
input:
vcf_file = filtered_vcfs[idx]
}

# Suggestion to improve performance of SuperMASSA, polyRAD and updog
if(filter_noninfo){
call utilsR.RemoveNonInformative {
input:
vcf_file = splitgeno.biallelics,
parent1 = dataset.parent1,
parent2 = dataset.parent2
parent2 = dataset.parent2,
replaceADbyMissing = replaceADbyMissing
}
}

File vcf_up = select_first([RemoveNonInformative.vcf_filtered, splitgeno.biallelics])

if(ploidy == 2) {
call genotyping.onemapMapsEmp as updogMaps {
input:
vcf_file = FilterSegregation.vcf_filtered,
# vcf_file = splitgeno.biallelics,
SNPCall_program = analysis.method,
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "updog",
CountsFrom = origin,
CountsFrom = vcfs_counts_source[idx],
cross = dataset.cross,
parent1 = dataset.parent1,
parent2 = dataset.parent2,
chromosome = dataset.chromosome,
multiallelics = dataset.multiallelics,
multiallelics_file = splitgeno.multiallelics,
max_cores = max_cores
max_cores = max_cores,
ploidy = ploidy
}

call genotyping.onemapMapsEmp as supermassaMaps {
input:
vcf_file = FilterSegregation.vcf_filtered,
# vcf_file = splitgeno.biallelics,
SNPCall_program = analysis.method,
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "supermassa",
CountsFrom = origin,
CountsFrom = vcfs_counts_source[idx],
cross = dataset.cross,
parent1 = dataset.parent1,
parent2 = dataset.parent2,
chromosome = dataset.chromosome,
multiallelics = dataset.multiallelics,
multiallelics_file = splitgeno.multiallelics,
max_cores = max_cores
max_cores = max_cores,
ploidy = ploidy
}

call genotyping.onemapMapsEmp as polyradMaps {
input:
vcf_file = FilterSegregation.vcf_filtered,
# vcf_file = splitgeno.biallelics,
SNPCall_program = analysis.method,
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "polyrad",
CountsFrom = origin,
CountsFrom = vcfs_counts_source[idx],
cross = dataset.cross,
parent1 = dataset.parent1,
parent2 = dataset.parent2,
chromosome = dataset.chromosome,
multiallelics = dataset.multiallelics,
multiallelics_file = splitgeno.multiallelics,
max_cores = max_cores,
ploidy = ploidy
}

# Build maps with GUSMap
call gusmap.gusmapMapsEmp {
input:
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
CountsFrom = vcfs_counts_source[idx],
GenotypeCall_program = "gusmap",
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores
}

if(vcfs_counts_source[idx] != "bam"){
call snpcaller.SNPCallerMapsEmp {
input:
vcf_file = splitgeno.biallelics,
cross = dataset.cross,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "SNPCaller",
CountsFrom = vcfs_counts_source[idx],
parent1 = dataset.parent1,
parent2 = dataset.parent2,
chromosome = dataset.chromosome,
multiallelics = dataset.multiallelics,
max_cores = max_cores,
multiallelics_file = splitgeno.multiallelics,
multiallelics_mchap = gatk_vcf_multi,
mchap = gatk_mchap
}
}
}

call utils.SplitMarkers as splitvcf {
input:
vcf_file = analysis.vcf
}
if(ploidy > 2){
call mappoly_sub.MappolyMapsEmp as updogPolyMaps {
input:
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "updog",
CountsFrom = vcfs_counts_source[idx],
cross = "F1",
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
}

call utils.SplitMarkers as splitbam {
input:
vcf_file = analysis.bam
}
call mappoly_sub.MappolyMapsEmp as polyradPolyMaps {
input:
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "polyrad",
CountsFrom = vcfs_counts_source[idx],
cross = "F1",
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
}

# Build maps with GUSMap
call gusmap.gusmapMapsEmp {
input:
vcf_file = splitvcf.biallelics,
vcf_bam_file = splitbam.biallelics,
SNPCall_program = analysis.method,
GenotypeCall_program = "gusmap",
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores
call mappoly_sub.MappolyMapsEmp as supermassaPolyMaps {
input:
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "supermassa",
CountsFrom = vcfs_counts_source[idx],
cross = "F1",
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
}

if(vcfs_counts_source[idx] != "bam"){
call mappoly_task.MappolyReport {
input:
vcf_file = vcf_up,
SNPCall_program = vcfs_software[idx],
GenotypeCall_program = "SNPCaller",
CountsFrom = vcfs_counts_source[idx],
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
}
}
}
}

if(ploidy == 2){
Array[File] snpcaller_results = select_all(SNPCallerMapsEmp.tar_gz_report)

call snpcaller.SNPCallerMapsEmp {
# Compress files
call reports.JointReports {
input:
vcf_file = splitvcf.biallelics,
cross = dataset.cross,
SNPCall_program = analysis.method,
GenotypeCall_program = "SNPCaller",
CountsFrom = "vcf",
parent1 = dataset.parent1,
parent2 = dataset.parent2,
chromosome = dataset.chromosome,
multiallelics = dataset.multiallelics,
max_cores = max_cores,
multiallelics_file = splitvcf.multiallelics,
multiallelics_mchap = gatk_vcf_multi,
mchap = gatk_mchap
SNPCaller = snpcaller_results,
updog = updogMaps.tar_gz_report,
polyrad = polyradMaps.tar_gz_report,
supermassa = supermassaMaps.tar_gz_report,
gusmap = gusmapMapsEmp.tar_gz_report,
max_cores = max_cores
}
}

# Compress files
call reports.JointReports {
input:
SNPCaller = SNPCallerMapsEmp.tar_gz_report,
updog = flatten(updogMaps.tar_gz_report),
polyrad = flatten(polyradMaps.tar_gz_report),
supermassa = flatten(supermassaMaps.tar_gz_report),
gusmap = gusmapMapsEmp.tar_gz_report,
max_cores = max_cores
if(ploidy > 2){
Array[File] snpcaller_results_poly = select_all(MappolyReport.results)

call reports.JointReportsPoly {
input:
SNPCaller = snpcaller_results_poly,
updog = updogPolyMaps.tar_gz_report,
polyrad = polyradPolyMaps.tar_gz_report,
supermassa = supermassaPolyMaps.tar_gz_report
}
}

File Empirical_results_sele = select_first([JointReports.EmpiricalReads_results, JointReportsPoly.EmpiricalReads_results])

output {
File EmpiricalReads_results = JointReports.EmpiricalReads_results
File EmpiricalReads_results = Empirical_results_sele
}
}
12 changes: 7 additions & 5 deletions pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ workflow EmpiricalReads {
Boolean gatk_mchap = false
Boolean hardfilters = true
Boolean replaceAD = true
String replaceADbyMissing = "TRUE" # Boolean inside R
Boolean run_gatk = true
Boolean run_freebayes = true
Int ploidy = 2
Expand Down Expand Up @@ -45,14 +46,15 @@ workflow EmpiricalReads {
call maps.Maps {
input:
dataset = dataset,
vcfs = SNPCalling.vcfs,
vcfs_software = SNPCalling.vcfs_software,
vcfs_counts_source = SNPCalling.vcfs_counts_source,
gatk_vcf_multi = SNPCalling.gatk_multi_vcf,
gatk_mchap = gatk_mchap,
gatk_vcf = SNPCalling.gatk_vcf,
freebayes_vcf = SNPCalling.freebayes_vcf,
gatk_vcf_bam_counts = SNPCalling.gatk_vcf_bam_count,
freebayes_vcf_bam_counts = SNPCalling.freebayes_vcf_bam_count,
filters = filters,
max_cores = max_cores
max_cores = max_cores,
replaceADbyMissing = replaceADbyMissing,
ploidy = ploidy
}

output {
Expand Down
Loading

0 comments on commit 05f5913

Please sign in to comment.