diff --git a/.circleci/config.yml b/.circleci/config.yml
index 47d815f3..e01a76b2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -24,7 +24,9 @@ jobs:
- checkout
- run: pip3 install -r tests/requirements.txt
- - run: pytest --git-aware tests/subworkflows/genotyping_empirical/freebayes/polyrad
+ - run: pytest --git-aware tests/subworkflows/gatk_genotyping/
+ - run: pytest --git-aware tests/subworkflows/stacks_genotyping/
+ - run: pytest --git-aware tests/subworkflows/freebayes_genotyping/
release-to-github:
<<: *machine_default
diff --git a/.configurations/cromwell_mysql.conf b/.configurations/cromwell_mysql.conf
index 9ada24b5..f90277b1 100644
--- a/.configurations/cromwell_mysql.conf
+++ b/.configurations/cromwell_mysql.conf
@@ -1,5 +1,5 @@
backend {
- default = SlurmSingularity
+ default = Local
providers {
diff --git a/.dockerfiles/java-in-the-cloud/Dockerfile b/.dockerfiles/java-in-the-cloud/Dockerfile
index abf78727..0227c496 100644
--- a/.dockerfiles/java-in-the-cloud/Dockerfile
+++ b/.dockerfiles/java-in-the-cloud/Dockerfile
@@ -1,4 +1,4 @@
-FROM openjdk:7
+FROM ibmjava:jre
COPY ./PedigreeSim.jar /usr/jars/
@@ -10,3 +10,10 @@ RUN apt-get update \
&& make \
&& mv vcf2diploid.jar /usr/jars/
+
+RUN wget https://bitbucket.org/tasseladmin/tassel-5-standalone/get/5f68583d0f56.zip \
+ && unzip 5f68583d0f56.zip \
+ && mkdir /usr/tassel \
+ && mv tasseladmin-tassel-5-standalone-5f68583d0f56/* /usr/tassel
+
+RUN apt-get install -y samtools
\ No newline at end of file
diff --git a/.dockerfiles/tassel/Dockerfile b/.dockerfiles/tassel/Dockerfile
new file mode 100644
index 00000000..dc6ce35d
--- /dev/null
+++ b/.dockerfiles/tassel/Dockerfile
@@ -0,0 +1,11 @@
+FROM ibmjava:jre
+
+RUN apt update \
+ && apt install -y parallel vcftools
+
+RUN conda install -y -c bioconda freebayes \
+ && conda install -y -c bioconda vcflib
+
+RUN wget https://bitbucket.org/tasseladmin/tassel-5-standalone/get/5f68583d0f56.zip \
+ && unzip 5f68583d0f56.zip \
+ && mv tasseladmin-tassel-5-standalone-5f68583d0f56/ /tassel
diff --git a/.scripts/build_pipeline_release.sh b/.scripts/build_pipeline_release.sh
index b94e011a..4b00a1e2 100755
--- a/.scripts/build_pipeline_release.sh
+++ b/.scripts/build_pipeline_release.sh
@@ -6,14 +6,16 @@ declare -r SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 &
source ${SCRIPT_DIR}/common.sh
declare ROOT_WDL=""
+declare ROOT_JSON=""
declare VERSION=""
declare OUTPUT_DIR=""
declare ENV=""
-declare -r ZIP_SUFFIX=".zip" WDL_SUFFIX=".wdl" OPTIONS_SUFFIX=".options.json"
+declare -r ZIP_SUFFIX=".zip" WDL_SUFFIX=".wdl" OPTIONS_SUFFIX=".inputs.json"
function make_release() {
local -r rootWdl=${ROOT_WDL}
+ local -r rootJSON=${ROOT_JSON}
local -r wdlBasename=$(basename ${rootWdl} ${WDL_SUFFIX})
# Store the current working directory
@@ -37,7 +39,8 @@ function make_release() {
# Strip the paths out of the root WDL imports
sed -E '/http/! s/import "(.*)\/(.*\'${WDL_SUFFIX}')"/import "\2"/g' ${rootWdl} > ${outputVersionedPrefix}${WDL_SUFFIX}
- write_options ${rootWdl} ${outputVersionedPrefix}
+ sed -E '/http/! s/import "(.*)\/(.*\'${OPTIONS_SUFFIX}')"/import "\2"/g' ${rootJSON} > ${outputVersionedPrefix}${OPTIONS_SUFFIX}
+ # write_options ${rootWdl} ${outputVersionedPrefix}
versioned_dependencies_zip=${outputVersionedPrefix}${ZIP_SUFFIX}
@@ -91,6 +94,7 @@ function show_help() {
echo ""
echo "Arguments:"
echo " -w The path to the workflow (.wdl) file"
+ echo " -j The path to the JSON input file template (.json) file"
echo " -v The version of the workflow (used in building the release name)"
echo " -o The directory into which the outputs will be written"
echo " -e The environment (dev, staging, or prod)"
@@ -98,7 +102,7 @@ function show_help() {
echo ""
}
-while getopts "hw:v:o:e:" opt; do
+while getopts "hw:j:v:o:e:" opt; do
case ${opt} in
h)
show_help
@@ -111,6 +115,13 @@ while getopts "hw:v:o:e:" opt; do
fi
ROOT_WDL=${OPTARG}
;;
+ j)
+ if [[ ! -f ${OPTARG} ]]; then
+ echo >&2 Error: ${OPTARG} does not exist!
+ exit 1
+ fi
+ ROOT_JSON=${OPTARG}
+ ;;
v)
VERSION=${OPTARG}
;;
diff --git a/.scripts/release_pipeline_to_github.sh b/.scripts/release_pipeline_to_github.sh
index f01b338d..80c5b05b 100755
--- a/.scripts/release_pipeline_to_github.sh
+++ b/.scripts/release_pipeline_to_github.sh
@@ -103,9 +103,10 @@ function build_and_release_to_github() {
local -r prerelease=${5}
local -r pipelineName=$(basename ${pipeline} .wdl)
local -r changelog=$(dirname ${pipeline})/${pipelineName}.changelog.md
+ local -r inputsName=$(dirname ${pipeline})/${pipelineName}.inputs.json
stderr "Building artifacts for ${releaseName}"
- ${SCRIPT_DIR}/build_pipeline_release.sh -w ${pipeline} -e prod -v ${version} -o ${localReleaseDir}
+ ${SCRIPT_DIR}/build_pipeline_release.sh -w ${pipeline} -j ${inputsName} -e prod -v ${version} -o ${localReleaseDir}
stderr "Building release notes for ${releaseName}"
local previousEntryStart
@@ -197,8 +198,8 @@ function upload_to_github_as_draft() {
${pipelineName} \
${version} \
${releaseName} \
- "options.json" \
- "application/json"
+ "inputs.json" \
+ "text/json"
local -r dependenciesZip=${localReleaseDir}/${pipelineName}/${pipelineName}_${version}.zip
@@ -308,4 +309,4 @@ localReleaseDir=$(mktemp -d)
trap cleanup_failed_release EXIT
-release_to_github ${PIPELINE_TO_RELEASE} ${ENV}
+release_to_github ${PIPELINE_TO_RELEASE} ${ENV}
\ No newline at end of file
diff --git a/README.md b/README.md
index d1435746..e384d086 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,32 @@
[![Development](https://img.shields.io/badge/development-active-blue.svg)](https://img.shields.io/badge/development-active-blue.svg)
[![Reads2Map](https://circleci.com/gh/Cristianetaniguti/Reads2Map.svg?style=svg)](https://app.circleci.com/pipelines/github/Cristianetaniguti/Reads2Map)
-## Reads2Map
-Reads2Map presents a collection of [WDL workflows](https://openwdl.org/) to build linkage maps from sequencing reads. Each workflow release is described in the [Read2Map releases page](https://github.com/Cristianetaniguti/Reads2Map/releases).
+
+
+
+
+
-The main workflows are the `EmpiricalReads2Map.wdl` and the `SimulatedReads2Map.wdl`. The `EmpiricalReads2Map.wdl` is composed by the `EmpiricalSNPCalling.wdl` that performs the SNP calling, and the `EmpiricalMaps.wdl` that performs the genotype calling and map building in empirical reads. The `SimulatedReads2Map.wdl` simulates Illumina reads for RADseq, exome, or WGS data and performs the SNP and genotype calling and genetic map building.
+Reads2Map is a collection of [WDL workflows](https://openwdl.org/) designed to facilitate the contruction of linkage maps from sequencing reads. You can find details of each workflow release on the Read2Map releases page, available [here](https://github.com/Cristianetaniguti/Reads2Map/releases).
-By now, [GATK](https://github.com/broadinstitute/gatk), [Freebayes](https://github.com/ekg/freebayes) are included for SNP calling; [updog](https://github.com/dcgerard/updog), [polyRAD](https://github.com/lvclark/polyRAD), [SuperMASSA](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0030906) for dosage calling; and [OneMap](https://github.com/augusto-garcia/onemap), and [GUSMap](https://github.com/tpbilton/GUSMap) for linkage map build.
+The main workflows are the `EmpiricalReads2Map.wdl` and the `SimulatedReads2Map.wdl`. The `EmpiricalReads2Map.wdl` is composed by the `EmpiricalSNPCalling.wdl` that performs the SNP calling, and the `EmpiricalMaps.wdl` that performs the genotype calling and map building in empirical reads. The `SimulatedReads2Map.wdl` used RADinitio software to simulate Illumina reads for RADseq, exome, or WGS data and performs the SNP and genotype calling and genetic map building.
-![math_meth2](https://user-images.githubusercontent.com/7572527/203172239-e4d2d857-84e2-48c5-bb88-01052a287004.png)
+The SNP calling step in Reads2Map currently includes the popular tools: GATK, Freebayes, TASSEL, and STACKs. For genotype/dosage calling, the workflow utilizes tools like updog, polyRAD, and SuperMASSA. Lastly, Reads2Map leverages OneMap, GUSMap, and MAPpoly for linkage map construction.
+
+For diploid data, you can visualize the results using the R package and shiny app called Reads2MapApp, available [here](https://github.com/Cristianetaniguti/Reads2MapApp). This package supports the visualization of linkage maps built using OneMap and GUSMap.
+
+The Reads2Map workflows perform the SNP and genotype/dosage calling for your complete data set. However, it builds the linkage map for only a single chromosome (reference genome is required) for each combination of software and parameters. The produced maps will probably still require improvements, but their characteristics will suggest which combination of SNP and genotype calling software and parameters you should use for your data. Once the pipeline is selected, you can input the respective VCF file in R and build the complete linkage map using OneMap or MAPpoly. Use [OneMap](https://statgen-esalq.github.io/tutorials/onemap/Outcrossing_Populations.html) or [MAPoly](https://rpubs.com/mmollin/tetra_mappoly_vignette) tutorials for guidance on building and improving the linkage map for the complete dataset.
## How to use
-Multiple systems are available to run WDL workflows such as Cromwell, miniWDL, and dxWDL. See further information in the [openwdl documentation](https://github.com/openwdl/wdl#execution-engines).
+Multiple systems are available to run WDL workflows such as Cromwell, miniWDL, and dxWDL. See further information in the [openwdl documentation](https://github.com/openwdl/wdl#execution-engines).
-To run a pipeline, first navigate to [Reads2Map releases page](https://github.com/Cristianetaniguti/Reads2Map/releases), search for the pipeline tag you which to run, and download the pipeline’s assets (the WDL workflow, the JSON, and the ZIP with accompanying dependencies).
+In addition, we also suggest two wrappers: [cromwell-cli](https://github.com/lmtani/cromwell-cli) and [Caper](https://github.com/ENCODE-DCC/caper). Here is a tutorial on how to setup these tools and one example running the EmpiricalReads2Map:
-## Documentation
+* [Setup and run Reads2Map workflows](https://cristianetaniguti.github.io/Tutorials/Reads2Map/Setup_and_run_Reads2Map_workflows.html)
+
+To run a pipeline, first navigate to [Reads2Map releases page](https://github.com/Cristianetaniguti/Reads2Map/releases), search for the pipeline tag you which to run, and download the pipeline’s assets (the WDL workflow, the JSON, and the ZIP with accompanying dependencies).
Check the description of the inputs for the pipelines:
@@ -25,14 +34,10 @@ Check the description of the inputs for the pipelines:
* [SimulatedReads2Map](https://cristianetaniguti.github.io/Tutorials/Reads2Map/simulatedreads.html)
-Check how to evaluate the workflows results in Reads2MapApp Shiny:
+Check how to evaluate the workflows results in Reads2MapApp Shiny (so far only available for diploid datasets):
* [Reads2MapApp](https://github.com/Cristianetaniguti/Reads2MapApp)
-Once you selected the best pipeline using a subset of your data, you can build a complete high-density linkage map:
-
-* [A Guide to Build High-Density Linkage Maps](https://cristianetaniguti.github.io/Tutorials/onemap/Quick_HighDens/High_density_maps.html)
-
Check more information and examples of usage in:
* [Taniguti, C. H., Taniguti, L. M., Amadeu, R. R., Mollinari, M., Da, G., Pereira, S., Riera-Lizarazu, O., Lau, J., Byrne, D., de Siqueira Gesteira, G., De, T., Oliveira, P., Ferreira, G. C., & Franco Garcia, A. A. Developing best practices for genotyping-by-sequencing analysis using linkage maps as benchmarks. BioRxiv. https://doi.org/10.1101/2022.11.24.517847](https://www.biorxiv.org/content/10.1101/2022.11.24.517847v2)
@@ -44,6 +49,8 @@ Check more information and examples of usage in:
- [ddRADseqTools](https://github.com/GGFHF/ddRADseqTools) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): Set of applications useful to in silico design and testing of double digest RADseq (ddRADseq) experiments;
- [Freebayes](https://github.com/ekg/freebayes) in [Cristaniguti/freebayes:0.0.1](): Variant call step;
- [GATK](https://github.com/broadinstitute/gatk) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Variant call step using Haplotype Caller, GenomicsDBImport and GenotypeGVCFs;
+- [TASSEL](https://www.maizegenetics.net/tassel) in [cristaniguti/java-in-the-cloud:0.0.2](https://hub.docker.com/repository/docker/cristaniguti/java-in-the-cloud/general): Variant Call
+- [STACKs](https://catchenlab.life.illinois.edu/stacks/) in [cristaniguti/stacks:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/stacks/general): Variant Call
- [PedigreeSim](https://github.com/PBR/pedigreeSim?files=1) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Simulates progeny genotypes from parents genotypes for different types of populations;
- [picard](https://github.com/broadinstitute/picard) in [us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z](https://console.cloud.google.com/gcr/images/broad-gotc-prod/US/genomes-in-the-cloud): Process alignment files;
- [pirs](https://github.com/galaxy001/pirs) in [cristaniguti/ pirs-ddrad-cutadapt:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/pirs-ddrad-cutadapt): To generate simulates paired-end reads from a reference genome;
@@ -63,4 +70,9 @@ Check more information and examples of usage in:
- [updog](https://github.com/dcgerard/updog) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Flexible Genotyping of Polyploids using Next Generation Sequencing Data
- [polyRAD](https://github.com/lvclark/polyRAD) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Genotype Calling with Uncertainty from Sequencing Data in Polyploids
- [Reads2MapApp](https://github.com/Cristianetaniguti/Reads2MapApp) in [cristaniguti/reads2mapApp:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Shiny app to evaluate Reads2Map workflows results
-- [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations
\ No newline at end of file
+- [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations
+- [MAPpoly](https://github.com/mmollina/MAPpoly) in [cristaniguti/reads2map:0.0.5](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Build linkage maps for autopolyploid species
+
+### Funding
+
+This work was partially supported by the National Council for Scientific and Technological Development (CNPq - 313269/2021-1); by USDA, National Institute of Food and Agriculture (NIFA), Specialty Crop Research Initiative (SCRI) project “Tools for Genomics Assisted Breeding in Polyploids: Development of a Community Resource” (Award No. 2020-51181-32156); and by the Bill and Melinda Gates Foundation (OPP1213329) project SweetGAINS.
\ No newline at end of file
diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md b/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md
index 05d8587f..7460fc67 100644
--- a/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md
+++ b/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md
@@ -1,3 +1,9 @@
+# 1.2.4
+
+* runtimes adapted to run with Caper
+* perform the genotype calling with updog, SuperMASSA and polyRAD with complete data set (not only for the selected chromosome)
+* [new tutorial](https://cristianetaniguti.github.io/Tutorials/Reads2Map/Setup_and_run_Reads2Map_workflows.html)
+
# 1.2.3
* Supermassa has smaller probability threshold (bugfix)
diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.inputs.json b/pipelines/EmpiricalMaps/EmpiricalMaps.inputs.json
new file mode 100644
index 00000000..c18ba567
--- /dev/null
+++ b/pipelines/EmpiricalMaps/EmpiricalMaps.inputs.json
@@ -0,0 +1,23 @@
+{
+ "Maps.dataset": {
+ "parent2": "String",
+ "name": "String",
+ "parent1": "String",
+ "chromosome": "String",
+ "cross": "String",
+ "multiallelics": "Boolean"
+ },
+ "Maps.max_cores": "Int",
+ "Maps.gatk_vcf_multi": "File? (optional)",
+ "Maps.gatk_mchap": "String",
+ "Maps.vcfs_counts_source": "Array[String]",
+ "Maps.filters": "String? (optional)",
+ "Maps.filt_segr": "String? (optional)",
+ "Maps.prob_thres": "Float? (optional)",
+ "Maps.ploidy": "Int",
+ "Maps.vcfs_software": "Array[String]",
+ "Maps.filter_noninfo": "Boolean",
+ "Maps.vcfs": "Array[File]",
+ "Maps.replaceADbyMissing": "String"
+}
+
diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl
index 6a90acdb..63630d34 100644
--- a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl
+++ b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl
@@ -20,6 +20,10 @@ workflow Maps {
Array[File] vcfs
Array[String] vcfs_software
Array[String] vcfs_counts_source
+ Boolean run_updog = true
+ Boolean run_supermassa = false
+ Boolean run_polyrad = true
+ Boolean run_gusmap = false
Boolean filter_noninfo
String replaceADbyMissing
File? gatk_vcf_multi
@@ -35,10 +39,10 @@ workflow Maps {
call utils.ApplyRandomFiltersArray {
input:
vcfs = vcfs,
- vcfs_software = vcfs_software,
- vcfs_counts_source = vcfs_counts_source,
- filters = filters,
- chromosome = dataset.chromosome
+ vcfs_SNPCall_software = vcfs_software,
+ vcfs_Counts_source = vcfs_counts_source,
+ vcfs_GenoCall_software = range(length(vcfs_software)),
+ filters = filters
}
}
@@ -66,64 +70,71 @@ workflow Maps {
File vcf_up = select_first([RemoveNonInformative.vcf_filtered, splitgeno.biallelics])
if(ploidy == 2) {
- call genotyping.onemapMapsEmp as updogMaps {
- input:
- vcf_file = vcf_up,
- SNPCall_program = vcfs_software[idx],
- GenotypeCall_program = "updog",
- CountsFrom = vcfs_counts_source[idx],
- cross = dataset.cross,
- parent1 = dataset.parent1,
- parent2 = dataset.parent2,
- chromosome = dataset.chromosome,
- multiallelics = dataset.multiallelics,
- multiallelics_file = splitgeno.multiallelics,
- max_cores = max_cores,
- ploidy = ploidy
+ if(run_updog){
+ call genotyping.onemapMapsEmp as updogMaps {
+ input:
+ vcf_file = vcf_up,
+ SNPCall_program = vcfs_software[idx],
+ GenotypeCall_program = "updog",
+ CountsFrom = vcfs_counts_source[idx],
+ cross = dataset.cross,
+ parent1 = dataset.parent1,
+ parent2 = dataset.parent2,
+ chromosome = dataset.chromosome,
+ multiallelics = dataset.multiallelics,
+ multiallelics_file = splitgeno.multiallelics,
+ max_cores = max_cores,
+ ploidy = ploidy
+ }
}
- call genotyping.onemapMapsEmp as supermassaMaps {
- input:
- vcf_file = vcf_up,
- SNPCall_program = vcfs_software[idx],
- GenotypeCall_program = "supermassa",
- CountsFrom = vcfs_counts_source[idx],
- cross = dataset.cross,
- parent1 = dataset.parent1,
- parent2 = dataset.parent2,
- chromosome = dataset.chromosome,
- multiallelics = dataset.multiallelics,
- multiallelics_file = splitgeno.multiallelics,
- max_cores = max_cores,
- ploidy = ploidy
+ if(run_supermassa){
+ call genotyping.onemapMapsEmp as supermassaMaps {
+ input:
+ vcf_file = vcf_up,
+ SNPCall_program = vcfs_software[idx],
+ GenotypeCall_program = "supermassa",
+ CountsFrom = vcfs_counts_source[idx],
+ cross = dataset.cross,
+ parent1 = dataset.parent1,
+ parent2 = dataset.parent2,
+ chromosome = dataset.chromosome,
+ multiallelics = dataset.multiallelics,
+ multiallelics_file = splitgeno.multiallelics,
+ max_cores = max_cores,
+ ploidy = ploidy
+ }
}
- call genotyping.onemapMapsEmp as polyradMaps {
- input:
- vcf_file = vcf_up,
- SNPCall_program = vcfs_software[idx],
- GenotypeCall_program = "polyrad",
- CountsFrom = vcfs_counts_source[idx],
- cross = dataset.cross,
- parent1 = dataset.parent1,
- parent2 = dataset.parent2,
- chromosome = dataset.chromosome,
- multiallelics = dataset.multiallelics,
- multiallelics_file = splitgeno.multiallelics,
- max_cores = max_cores,
- ploidy = ploidy
+ if(run_polyrad){
+ call genotyping.onemapMapsEmp as polyradMaps {
+ input:
+ vcf_file = vcf_up,
+ SNPCall_program = vcfs_software[idx],
+ GenotypeCall_program = "polyrad",
+ CountsFrom = vcfs_counts_source[idx],
+ cross = dataset.cross,
+ parent1 = dataset.parent1,
+ parent2 = dataset.parent2,
+ chromosome = dataset.chromosome,
+ multiallelics = dataset.multiallelics,
+ multiallelics_file = splitgeno.multiallelics,
+ max_cores = max_cores,
+ ploidy = ploidy
+ }
}
- # Build maps with GUSMap
- call gusmap.gusmapMapsEmp {
- input:
- vcf_file = vcf_up,
- SNPCall_program = vcfs_software[idx],
- CountsFrom = vcfs_counts_source[idx],
- GenotypeCall_program = "gusmap",
- parent1 = dataset.parent1,
- parent2 = dataset.parent2,
- max_cores = max_cores
+ if(run_gusmap){
+ call gusmap.gusmapMapsEmp {
+ input:
+ vcf_file = vcf_up,
+ SNPCall_program = vcfs_software[idx],
+ CountsFrom = vcfs_counts_source[idx],
+ GenotypeCall_program = "gusmap",
+ parent1 = dataset.parent1,
+ parent2 = dataset.parent2,
+ max_cores = max_cores
+ }
}
if(vcfs_counts_source[idx] != "bam"){
@@ -147,52 +158,58 @@ workflow Maps {
}
if(ploidy > 2){
- call mappoly_sub.MappolyMapsEmp as updogPolyMaps {
- input:
- vcf_file = vcf_up,
- SNPCall_program = vcfs_software[idx],
- GenotypeCall_program = "updog",
- CountsFrom = vcfs_counts_source[idx],
- cross = "F1",
- parent1 = dataset.parent1,
- parent2 = dataset.parent2,
- max_cores = max_cores,
- ploidy = ploidy,
- prob_thres = prob_thres,
- filt_segr = filt_segr
+ if(run_updog){
+ call mappoly_sub.MappolyMapsEmp as updogPolyMaps {
+ input:
+ vcf_file = vcf_up,
+ SNPCall_program = vcfs_software[idx],
+ GenotypeCall_program = "updog",
+ CountsFrom = vcfs_counts_source[idx],
+ cross = "F1",
+ parent1 = dataset.parent1,
+ parent2 = dataset.parent2,
+ max_cores = max_cores,
+ ploidy = ploidy,
+ prob_thres = prob_thres,
+ filt_segr = filt_segr
+ }
}
- call mappoly_sub.MappolyMapsEmp as polyradPolyMaps {
- input:
- vcf_file = vcf_up,
- SNPCall_program = vcfs_software[idx],
- GenotypeCall_program = "polyrad",
- CountsFrom = vcfs_counts_source[idx],
- cross = "F1",
- parent1 = dataset.parent1,
- parent2 = dataset.parent2,
- max_cores = max_cores,
- ploidy = ploidy,
- prob_thres = prob_thres,
- filt_segr = filt_segr
+ if(run_polyrad){
+ call mappoly_sub.MappolyMapsEmp as polyradPolyMaps {
+ input:
+ vcf_file = vcf_up,
+ SNPCall_program = vcfs_software[idx],
+ GenotypeCall_program = "polyrad",
+ CountsFrom = vcfs_counts_source[idx],
+ cross = "F1",
+ parent1 = dataset.parent1,
+ parent2 = dataset.parent2,
+ max_cores = max_cores,
+ ploidy = ploidy,
+ prob_thres = prob_thres,
+ filt_segr = filt_segr
+ }
}
-
- call mappoly_sub.MappolyMapsEmp as supermassaPolyMaps {
- input:
- vcf_file = vcf_up,
- SNPCall_program = vcfs_software[idx],
- GenotypeCall_program = "supermassa",
- CountsFrom = vcfs_counts_source[idx],
- cross = "F1",
- parent1 = dataset.parent1,
- parent2 = dataset.parent2,
- max_cores = max_cores,
- ploidy = ploidy,
- prob_thres = prob_thres,
- filt_segr = filt_segr
+
+ if(run_supermassa){
+ call mappoly_sub.MappolyMapsEmp as supermassaPolyMaps {
+ input:
+ vcf_file = vcf_up,
+ SNPCall_program = vcfs_software[idx],
+ GenotypeCall_program = "supermassa",
+ CountsFrom = vcfs_counts_source[idx],
+ cross = "F1",
+ parent1 = dataset.parent1,
+ parent2 = dataset.parent2,
+ max_cores = max_cores,
+ ploidy = ploidy,
+ prob_thres = prob_thres,
+ filt_segr = filt_segr
+ }
}
- if(vcfs_counts_source[idx] != "bam"){
+ if(vcfs_counts_source[idx] != "bam" && vcfs_software[idx] != "stacks" && vcfs_software[idx] != "tassel"){
call mappoly_task.MappolyReport {
input:
vcf_file = vcf_up,
@@ -210,35 +227,51 @@ workflow Maps {
}
}
- if(ploidy == 2){
- Array[File] snpcaller_results = select_all(SNPCallerMapsEmp.tar_gz_report)
-
- # Compress files
- call reports.JointReports {
- input:
- SNPCaller = snpcaller_results,
- updog = updogMaps.tar_gz_report,
- polyrad = polyradMaps.tar_gz_report,
- supermassa = supermassaMaps.tar_gz_report,
- gusmap = gusmapMapsEmp.tar_gz_report,
- max_cores = max_cores
- }
+ if(defined(updogMaps.tar_gz_report)) {
+ Array[File] snpcaller_results = select_all(SNPCallerMapsEmp.tar_gz_report)
}
-
- if(ploidy > 2){
-
- call reports.JointReportsPoly {
- input:
- SNPCaller = select_all(MappolyReport.results),
- updog = select_all(updogPolyMaps.tar_gz_report),
- polyrad = select_all(polyradPolyMaps.tar_gz_report),
- supermassa = select_all(supermassaPolyMaps.tar_gz_report)
- }
+ if(defined(updogMaps.tar_gz_report)) {
+ Array[File] updog_results = select_all(updogMaps.tar_gz_report)
+ }
+ if(defined(supermassaMaps.tar_gz_report)){
+ Array[File] supermassa_results = select_all(supermassaMaps.tar_gz_report)
+ }
+ if(defined(polyradMaps.tar_gz_report)){
+ Array[File] polyrad_results = select_all(polyradMaps.tar_gz_report)
+ }
+ if(defined(gusmapMapsEmp.tar_gz_report)){
+ Array[File] gusmap_results = select_all(gusmapMapsEmp.tar_gz_report)
+ }
+ if(defined(MappolyReport.results)){
+ Array[File] snpcaller_poly_results = select_all(MappolyReport.results)
+ }
+ if(defined(updogPolyMaps.tar_gz_report)){
+ Array[File] updog_poly_results = select_all(updogPolyMaps.tar_gz_report)
+ }
+ if(defined(polyradPolyMaps.tar_gz_report)){
+ Array[File] polyrad_poly_results = select_all(polyradPolyMaps.tar_gz_report)
+ }
+ if(defined(supermassaPolyMaps.tar_gz_report)){
+ Array[File] supermassa_poly_results = select_all(supermassaPolyMaps.tar_gz_report)
+ }
+
+ # Compress files
+ call reports.JointAllReports {
+ input:
+ SNPCallerMapsEmp = snpcaller_results,
+ updogMaps = updog_results,
+ polyradMaps = polyrad_results,
+ supermassaMaps = supermassa_results,
+ gusmapMapsEmp = gusmap_results,
+ SNPCallerPolyMapsEmp = snpcaller_poly_results,
+ updogPolyMaps = updog_poly_results,
+ polyradPolyMaps = polyrad_poly_results,
+ supermassaPolyMaps = supermassa_poly_results,
+ max_cores = max_cores,
+ ploidy = ploidy
}
-
- File Empirical_results_sele = select_first([JointReports.EmpiricalReads_results, JointReportsPoly.EmpiricalReads_results])
output {
- File EmpiricalReads_results = Empirical_results_sele
+ File EmpiricalReads_results = JointAllReports.EmpiricalReads_results
}
}
diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md
index 06756f9a..f85a429f 100644
--- a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md
+++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md
@@ -1,3 +1,24 @@
+# 1.4.0
+
+* STACKs included
+* support to pair-end reads
+* defined defaults
+* runtimes adapted to run with Caper
+* [new tutorial](https://cristianetaniguti.github.io/Tutorials/Reads2Map/Setup_and_run_Reads2Map_workflows.html)
+
+# 1.3.0
+
+* TASSEL 5.0 included
+* releases include the input files template
+* new parameter to control maximum ram memory used by GATK and TASSEL
+
+# 1.2.0
+
+* Add MAPpoly to build linkage maps for polyploid species
+* Run freebayes parallelizing in nodes according to chromosomes and cores splitting in genomic regions
+* Adjust runtimes
+* Add polyploid dataset for tests
+
# 1.0.0
Initial release
diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.inputs.json b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.inputs.json
new file mode 100644
index 00000000..9945e780
--- /dev/null
+++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.inputs.json
@@ -0,0 +1,49 @@
+{
+ "EmpiricalReads.replaceAD": "Boolean (optional, default = true)",
+ "EmpiricalReads.filters": "String? (optional)",
+ "EmpiricalReads.Maps.run_updog": "Boolean (optional, default = true)",
+ "EmpiricalReads.filt_segr": "String? (optional)",
+ "EmpiricalReads.max_ram": "Int",
+ "EmpiricalReads.replaceADbyMissing": "String (optional, default = \"TRUE\")",
+ "EmpiricalReads.Maps.run_supermassa": "Boolean (optional, default = false)",
+ "EmpiricalReads.run_tassel": "Boolean (optional, default = true)",
+ "EmpiricalReads.chunk_size": "Int",
+ "EmpiricalReads.Maps.run_gusmap": "Boolean (optional, default = false)",
+ "EmpiricalReads.gatk_mchap": "Boolean (optional, default = false)",
+ "EmpiricalReads.rm_dupli": "Boolean (optional, default = false)",
+ "EmpiricalReads.SNPCalling.max_ram": "Int",
+ "EmpiricalReads.dataset": {
+ "parent2": "String",
+ "name": "String",
+ "parent1": "String",
+ "chromosome": "String",
+ "cross": "String",
+ "multiallelics": "Boolean"
+ },
+ "EmpiricalReads.ploidy": "Int (optional, default = 2)",
+ "EmpiricalReads.n_chrom": "Int",
+ "EmpiricalReads.SNPCalling.chunk_size": "Int",
+ "EmpiricalReads.prob_thres": "Float? (optional)",
+ "EmpiricalReads.hardfilters": "Boolean (optional, default = true)",
+ "EmpiricalReads.run_freebayes": "Boolean (optional, default = true)",
+ "EmpiricalReads.enzyme": "String? (optional)",
+ "EmpiricalReads.max_cores": "Int",
+ "EmpiricalReads.references": {
+ "ref_fasta": "File",
+ "ref_dict": "File",
+ "ref_ann": "File",
+ "ref_sa": "File",
+ "ref_amb": "File",
+ "ref_pac": "File",
+ "ref_bwt": "File",
+ "ref_fasta_index": "File"
+ },
+ "EmpiricalReads.Maps.run_polyrad": "Boolean (optional, default = true)",
+ "EmpiricalReads.SNPCalling.run_stacks": "Boolean (optional, default = true)",
+ "EmpiricalReads.Maps.filter_noninfo": "Boolean",
+ "EmpiricalReads.SNPCalling.pop_map": "File? (optional)",
+ "EmpiricalReads.samples_info": "File",
+ "EmpiricalReads.pair_end": "Boolean (optional, default = false)",
+ "EmpiricalReads.run_gatk": "Boolean (optional, default = true)"
+}
+
diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl
index dd3f408c..20f07cc4 100644
--- a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl
+++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl
@@ -13,6 +13,7 @@ workflow EmpiricalReads {
ReferenceFasta references
Dataset dataset
Int max_cores
+ Int max_ram
Int chunk_size
Boolean rm_dupli = false
Boolean gatk_mchap = false
@@ -21,6 +22,9 @@ workflow EmpiricalReads {
String replaceADbyMissing = "TRUE" # Boolean inside R
Boolean run_gatk = true
Boolean run_freebayes = true
+ Boolean run_tassel = true
+ Boolean pair_end = false
+ String? enzyme
Int ploidy = 2
Int n_chrom
String? filters
@@ -41,8 +45,11 @@ workflow EmpiricalReads {
replaceAD = replaceAD,
run_gatk = run_gatk,
run_freebayes = run_freebayes,
+ run_tassel = run_tassel,
ploidy = ploidy,
- n_chrom = n_chrom
+ n_chrom = n_chrom,
+ enzyme = enzyme,
+ pair_end = pair_end
}
call maps.Maps {
diff --git a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md
index a039a9c8..f51a21ca 100644
--- a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md
+++ b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md
@@ -1,3 +1,17 @@
+# 1.4.0
+
+* STACKs included
+* support to pair-end reads
+* defined defaults
+* runtimes adapted to run with Caper
+* [new tutorial](https://cristianetaniguti.github.io/Tutorials/Reads2Map/Setup_and_run_Reads2Map_workflows.html)
+
+# 1.3.0
+
+* TASSEL 5.0 included
+* releases include the input files template
+* new parameter to control maximum ram memory used by GATK and TASSEL
+
# 1.2.1
* Adjustments in runtime
diff --git a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.inputs.json b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.inputs.json
new file mode 100644
index 00000000..2688353a
--- /dev/null
+++ b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.inputs.json
@@ -0,0 +1,36 @@
+{
+ "SNPCalling.enzyme": "String? (optional)",
+ "SNPCalling.P1": "String? (optional)",
+ "SNPCalling.max_cores": "Int",
+ "SNPCalling.pair_end": "Boolean (optional, default = false)",
+ "SNPCalling.ploidy": "Int",
+ "SNPCalling.rm_dupli": "Boolean (optional, default = false)",
+ "SNPCalling.GatkGenotyping.vcf_simu": "File? (optional)",
+ "SNPCalling.replaceAD": "Boolean (optional, default = false)",
+ "SNPCalling.run_gatk": "Boolean (optional, default = true)",
+ "SNPCalling.pop_map": "File? (optional)",
+ "SNPCalling.run_tassel": "Boolean (optional, default = true)",
+ "SNPCalling.n_chrom": "Int",
+ "SNPCalling.chunk_size": "Int",
+ "SNPCalling.GatkGenotyping.depth": "Int? (optional)",
+ "SNPCalling.samples_info": "File",
+ "SNPCalling.P2": "String? (optional)",
+ "SNPCalling.max_ram": "Int",
+ "SNPCalling.FreebayesGenotyping.vcf_simu": "File? (optional)",
+ "SNPCalling.gatk_mchap": "Boolean (optional, default = false)",
+ "SNPCalling.references": {
+ "ref_fasta": "File",
+ "ref_dict": "File",
+ "ref_ann": "File",
+ "ref_sa": "File",
+ "ref_amb": "File",
+ "ref_pac": "File",
+ "ref_bwt": "File",
+ "ref_fasta_index": "File"
+ },
+ "SNPCalling.run_freebayes": "Boolean (optional, default = false)",
+ "SNPCalling.GatkGenotyping.seed": "Int? (optional)",
+ "SNPCalling.run_stacks": "Boolean (optional, default = true)",
+ "SNPCalling.hardfilters": "Boolean (optional, default = true)"
+}
+
diff --git a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl
index 30a723a4..a501579a 100644
--- a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl
+++ b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl
@@ -5,6 +5,8 @@ import "../../structs/dna_seq_structs.wdl"
import "../../subworkflows/create_alignment_from_families_files.wdl" as fam
import "../../subworkflows/gatk_genotyping.wdl" as gatk
import "../../subworkflows/freebayes_genotyping.wdl" as freebayes
+import "../../subworkflows/tassel_genotyping.wdl" as tassel
+import "../../subworkflows/stacks_genotyping.wdl" as stacks
workflow SNPCalling {
@@ -13,6 +15,7 @@ workflow SNPCalling {
File samples_info
ReferenceFasta references
Int max_cores
+ Int max_ram
Int chunk_size
Boolean rm_dupli = false
String? P1
@@ -21,7 +24,12 @@ workflow SNPCalling {
Boolean hardfilters = true
Boolean replaceAD = false
Boolean run_gatk = true
- Boolean run_freebayes = true
+ Boolean run_freebayes = false
+ Boolean run_tassel = true
+ Boolean run_stacks = true
+ Boolean pair_end = false
+ File? pop_map
+ String? enzyme
Int ploidy
Int n_chrom
}
@@ -33,7 +41,8 @@ workflow SNPCalling {
max_cores = max_cores,
rm_dupli = rm_dupli,
chunk_size = chunk_size,
- gatk_mchap = gatk_mchap
+ gatk_mchap = gatk_mchap,
+ pair_end = pair_end
}
if(run_gatk){
@@ -46,6 +55,7 @@ workflow SNPCalling {
ploidy = ploidy,
program="gatk",
max_cores = max_cores,
+ max_ram = max_ram,
merged_bams = CreateAlignmentFromFamilies.merged_bam,
P1 = P1,
P2 = P2,
@@ -68,15 +78,36 @@ workflow SNPCalling {
}
}
- Array[Array[File]] vcfs_sele = select_all([GatkGenotyping.vcfs, FreebayesGenotyping.vcfs])
- Array[Array[String]] software_sele = select_all([GatkGenotyping.vcfs_software, FreebayesGenotyping.vcfs_software])
- Array[Array[String]] source_sele = select_all([GatkGenotyping.vcfs_counts_source, FreebayesGenotyping.vcfs_counts_source])
+ if(run_tassel) {
+ call tassel.TasselGenotyping{
+ input:
+ families_info = samples_info,
+ references = references,
+ max_cores = max_cores,
+ max_ram = max_ram,
+ enzyme = enzyme
+ }
+ }
+
+ if(run_stacks) {
+ call stacks.StacksGenotyping {
+ input:
+ bams = CreateAlignmentFromFamilies.bam,
+ pop_map = pop_map,
+ max_cores = max_cores
+ }
+ }
+
+ Array[Array[File]] vcfs_sele = select_all([GatkGenotyping.vcfs, FreebayesGenotyping.vcfs, TasselGenotyping.vcfs, StacksGenotyping.vcfs])
+ Array[Array[String]] software_sele = select_all([GatkGenotyping.vcfs_software, FreebayesGenotyping.vcfs_software, TasselGenotyping.software_sele, StacksGenotyping.software_sele])
+ Array[Array[String]] source_sele = select_all([GatkGenotyping.vcfs_counts_source, FreebayesGenotyping.vcfs_counts_source, TasselGenotyping.source_sele, StacksGenotyping.source_sele])
output {
Array[File] vcfs = flatten(vcfs_sele)
Array[String] vcfs_software = flatten(software_sele)
Array[String] vcfs_counts_source = flatten(source_sele)
File? gatk_multi_vcf = GatkGenotyping.vcf_multi
+ File? stacks_multiallelics = StacksGenotyping.stacks_multiallelics
File? gatk_vcfEval = GatkGenotyping.vcfEval
File? Plots = GatkGenotyping.Plots
File? freebayes_vcfEval = FreebayesGenotyping.vcfEval
diff --git a/pipelines/PreprocessingReads/PreprocessingReads.changelog.md b/pipelines/PreprocessingReads/PreprocessingReads.changelog.md
index cb359355..fc66cc7a 100644
--- a/pipelines/PreprocessingReads/PreprocessingReads.changelog.md
+++ b/pipelines/PreprocessingReads/PreprocessingReads.changelog.md
@@ -1,3 +1,7 @@
+# 1.0.1
+
+* runtimes adapted to run with Caper
+
# 1.0.0
Initial release
diff --git a/pipelines/PreprocessingReads/PreprocessingReads.inputs.json b/pipelines/PreprocessingReads/PreprocessingReads.inputs.json
new file mode 100644
index 00000000..67a40fd1
--- /dev/null
+++ b/pipelines/PreprocessingReads/PreprocessingReads.inputs.json
@@ -0,0 +1,10 @@
+{
+ "PreprocessingReads.spec": {
+ "barcodes": "File? (optional)",
+ "adapter": "String",
+ "raw_dict": "File",
+ "enzyme": "String",
+ "enzyme2": "String? (optional)"
+ }
+}
+
diff --git a/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md b/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md
index 4f9ef674..08373c0c 100644
--- a/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md
+++ b/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md
@@ -1,3 +1,7 @@
+# 1.0.1
+
+* runtimes adapted to run with Caper
+
# 1.0.0
Initial release
diff --git a/pipelines/SimulatedReads2Map/SimulatedReads2Map.inputs.json b/pipelines/SimulatedReads2Map/SimulatedReads2Map.inputs.json
new file mode 100644
index 00000000..ac689dc8
--- /dev/null
+++ b/pipelines/SimulatedReads2Map/SimulatedReads2Map.inputs.json
@@ -0,0 +1,49 @@
+{
+ "SimulatedReads.filters": "String? (optional)",
+ "SimulatedReads.hardfilters": "Boolean (optional, default = true)",
+ "SimulatedReads.family": {
+ "cmBymb": "Float? (optional)",
+ "seed": "Int",
+ "doses": "File? (optional)",
+ "cross": "String",
+ "popsize": "Int",
+ "ploidy": "Int"
+ },
+ "SimulatedReads.references": {
+ "ref_fasta": "File",
+ "ref_dict": "File",
+ "ref_ann": "File",
+ "ref_sa": "File",
+ "ref_amb": "File",
+ "ref_pac": "File",
+ "ref_bwt": "File",
+ "ref_fasta_index": "File"
+ },
+ "SimulatedReads.n_chrom": "Int",
+ "SimulatedReads.chunk_size": "Int (optional, default = 5)",
+ "SimulatedReads.max_cores": "Int",
+ "SimulatedReads.number_of_families": "Int",
+ "SimulatedReads.gatk_mchap": "Boolean (optional, default = false)",
+ "SimulatedReads.global_seed": "Int",
+ "SimulatedReads.sequencing": {
+ "emp_vcf": "File",
+ "enzyme1": "String",
+ "pcr_cycles": "Int? (optional)",
+ "vcf_parent1": "String",
+ "library_type": "String",
+ "depth_parents": "Int",
+ "mapsize": "Int",
+ "rm_dupli": "String",
+ "insert_size": "Int? (optional)",
+ "depth": "Int",
+ "insert_size_dev": "Int? (optional)",
+ "ref_map": "File? (optional)",
+ "enzyme2": "String? (optional)",
+ "multiallelics": "String",
+ "vcf_parent2": "String",
+ "read_length": "Int? (optional)",
+ "emp_bam": "File? (optional)",
+ "chromosome": "String"
+ }
+}
+
diff --git a/subworkflows/SimulatedSingleFamily.wdl b/subworkflows/SimulatedSingleFamily.wdl
index 54d716ad..80718f4e 100644
--- a/subworkflows/SimulatedSingleFamily.wdl
+++ b/subworkflows/SimulatedSingleFamily.wdl
@@ -86,8 +86,9 @@ workflow SimulatedSingleFamily {
call utils.ApplyRandomFiltersArray {
input:
vcfs = flatten(vcfs_sele),
- vcfs_software = flatten(software_sele),
- vcfs_counts_source = flatten(source_sele),
+ vcfs_SNPCall_software = flatten(software_sele),
+ vcfs_Counts_source = flatten(source_sele),
+ vcfs_GenoCall_software = range(length(flatten(software_sele))),
filters = filters,
chromosome = sequencing.chromosome
}
diff --git a/subworkflows/create_alignment_from_families_files.wdl b/subworkflows/create_alignment_from_families_files.wdl
index b284de7f..79b03230 100644
--- a/subworkflows/create_alignment_from_families_files.wdl
+++ b/subworkflows/create_alignment_from_families_files.wdl
@@ -11,6 +11,7 @@ workflow CreateAlignmentFromFamilies {
Int max_cores
Boolean rm_dupli
Boolean gatk_mchap
+ Boolean pair_end
Int chunk_size
}
@@ -24,11 +25,17 @@ workflow CreateAlignmentFromFamilies {
Array[Array[String]] sample_file = read_tsv(chunk)
+ if(pair_end) {
+ Array[File] pair = sample_file[3]
+ }
+
call alg.RunBwaAlignment {
input:
sampleName = sample_file[1],
- reads = sample_file[0],
+ reads1 = sample_file[0],
+ reads2 = pair,
libraries = sample_file[2],
+ pair_end = pair_end,
references = references,
max_cores = max_cores,
rm_dupli = rm_dupli
diff --git a/subworkflows/gatk_genotyping.wdl b/subworkflows/gatk_genotyping.wdl
index 1a4375d0..4421d3d1 100644
--- a/subworkflows/gatk_genotyping.wdl
+++ b/subworkflows/gatk_genotyping.wdl
@@ -29,6 +29,7 @@ workflow GatkGenotyping {
File? merged_bams
String? P1
String? P2
+ Int? max_ram
}
call chunk_lists.CreateChunksBam {
@@ -49,7 +50,8 @@ workflow GatkGenotyping {
reference_fai = references.ref_fasta_index,
reference_dict = references.ref_dict,
ploidy = ploidy,
- chunk_size = chunk_size
+ chunk_size = chunk_size,
+ max_ram = max_ram
}
}
@@ -63,7 +65,8 @@ workflow GatkGenotyping {
reference_fasta=references.ref_fasta,
reference_fai=references.ref_fasta_index,
reference_dict=references.ref_dict,
- interval = interval
+ interval = interval,
+ max_ram = max_ram
}
call gatk.GenotypeGVCFs {
@@ -72,7 +75,8 @@ workflow GatkGenotyping {
interval = interval,
reference_fasta = references.ref_fasta,
reference_fai = references.ref_fasta_index,
- reference_dict = references.ref_dict
+ reference_dict = references.ref_dict,
+ max_ram = max_ram
}
}
diff --git a/subworkflows/genotyping_empirical.wdl b/subworkflows/genotyping_empirical.wdl
index 646b56ac..f08ddf62 100644
--- a/subworkflows/genotyping_empirical.wdl
+++ b/subworkflows/genotyping_empirical.wdl
@@ -23,6 +23,8 @@ workflow onemapMapsEmp {
call utilsR.ReGenotyping {
input:
vcf_file = vcf_file,
+ SNPCall_program = SNPCall_program,
+ CountsFrom = CountsFrom,
GenotypeCall_program = GenotypeCall_program,
cross = cross,
parent1 = parent1,
@@ -31,15 +33,39 @@ workflow onemapMapsEmp {
ploidy = ploidy
}
+ call utils.ApplyRandomFiltersArray as FilterBi{
+ input:
+ vcfs = [ReGenotyping.regeno_vcf],
+ vcfs_SNPCall_software = [SNPCall_program],
+ vcfs_Counts_source = [CountsFrom],
+ vcfs_GenoCall_software = [GenotypeCall_program + "_biallelic"],
+ chromosome = chromosome
+ }
+
if (multiallelics) {
+
+ Array[File] array_vcf = select_all([multiallelics_file])
+
+ call utils.ApplyRandomFiltersArray as FilterMulti{
+ input:
+ vcfs = array_vcf,
+ vcfs_SNPCall_software = [SNPCall_program],
+ vcfs_Counts_source = [CountsFrom],
+ vcfs_GenoCall_software = [GenotypeCall_program + "_multiallelic"],
+ chromosome = chromosome
+ }
+
call utils.JointMarkers {
input:
- biallelic_vcf = ReGenotyping.regeno_vcf,
- multiallelic_vcf = multiallelics_file
+ biallelic_vcf = FilterBi.vcfs_filt[0],
+ multiallelic_vcf = FilterMulti.vcfs_filt[0],
+ SNPCall_program = SNPCall_program,
+ CountsFrom = CountsFrom,
+ GenotypeCall_program = GenotypeCall_program
}
}
- File updated_vcf = select_first([JointMarkers.merged_vcf, ReGenotyping.regeno_vcf])
+ File updated_vcf = select_first([JointMarkers.merged_vcf, FilterBi.vcfs_filt[0]])
call utilsR.SetProbs {
input:
diff --git a/subworkflows/genotyping_simulated.wdl b/subworkflows/genotyping_simulated.wdl
index b3f1f3bf..d67d5041 100644
--- a/subworkflows/genotyping_simulated.wdl
+++ b/subworkflows/genotyping_simulated.wdl
@@ -28,6 +28,8 @@ workflow onemapMaps {
input:
vcf_file = vcf_file,
GenotypeCall_program = genotyping_program,
+ SNPCall_program = SNPCall_program,
+ CountsFrom = CountsFrom,
cross = cross,
parent1 = "P1",
parent2 = "P2",
@@ -39,7 +41,10 @@ workflow onemapMaps {
call utils.JointMarkers {
input:
biallelic_vcf = ReGenotyping.regeno_vcf,
- multiallelic_vcf = multiallelics_file
+ multiallelic_vcf = multiallelics_file,
+ SNPCall_program = SNPCall_program,
+ CountsFrom = CountsFrom,
+ GenotypeCall_program = genotyping_program
}
}
diff --git a/subworkflows/mappoly_maps_empirical.wdl b/subworkflows/mappoly_maps_empirical.wdl
index 5b02c5a9..91dfefa2 100644
--- a/subworkflows/mappoly_maps_empirical.wdl
+++ b/subworkflows/mappoly_maps_empirical.wdl
@@ -23,6 +23,8 @@ workflow MappolyMapsEmp {
input:
vcf_file = vcf_file,
GenotypeCall_program = GenotypeCall_program,
+ SNPCall_program = SNPCall_program,
+ CountsFrom = CountsFrom,
cross = cross,
parent1 = parent1,
parent2 = parent2,
diff --git a/subworkflows/snpcaller_maps_empirical.wdl b/subworkflows/snpcaller_maps_empirical.wdl
index 0e24aa25..ddc5f680 100644
--- a/subworkflows/snpcaller_maps_empirical.wdl
+++ b/subworkflows/snpcaller_maps_empirical.wdl
@@ -25,7 +25,10 @@ workflow SNPCallerMapsEmp {
call utils.JointMarkers {
input:
biallelic_vcf = vcf_file,
- multiallelic_vcf = multiallelics_file
+ multiallelic_vcf = multiallelics_file,
+ SNPCall_program = SNPCall_program,
+ CountsFrom = CountsFrom,
+ GenotypeCall_program = GenotypeCall_program
}
}
diff --git a/subworkflows/snpcaller_maps_simulated.wdl b/subworkflows/snpcaller_maps_simulated.wdl
index 96563505..c0138bb0 100644
--- a/subworkflows/snpcaller_maps_simulated.wdl
+++ b/subworkflows/snpcaller_maps_simulated.wdl
@@ -27,7 +27,10 @@ workflow SNPCallerMaps {
call utils.JointMarkers {
input:
biallelic_vcf = vcf_file,
- multiallelic_vcf = multiallelics_file
+ multiallelic_vcf = multiallelics_file,
+ SNPCall_program = SNPCall_program,
+ CountsFrom = CountsFrom,
+ GenotypeCall_program = GenotypeCall_program
}
}
diff --git a/subworkflows/stacks_genotyping.wdl b/subworkflows/stacks_genotyping.wdl
new file mode 100644
index 00000000..baaf7c15
--- /dev/null
+++ b/subworkflows/stacks_genotyping.wdl
@@ -0,0 +1,35 @@
+version 1.0
+
+import "../tasks/stacks.wdl"
+
+
+workflow StacksGenotyping {
+ input {
+ Array[File] bams
+ File? pop_map
+ Int max_cores
+ }
+
+ if(!defined(pop_map)){
+ call stacks.CreatePopMapFile {
+ input:
+ bams = bams
+ }
+ }
+
+ File pop_map_sele = select_first([pop_map, CreatePopMapFile.pop_map])
+
+ call stacks.RefMap {
+ input:
+ bams = bams,
+ pop_map = pop_map_sele,
+ max_cores = max_cores
+ }
+
+ output {
+ Array[File] vcfs = [RefMap.stacks_vcf]
+ File stacks_multiallelics = RefMap.stacks_multiallelics
+ Array[String] software_sele = ["stacks"]
+ Array[String] source_sele = ["vcf"]
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/tassel_genotyping.wdl b/subworkflows/tassel_genotyping.wdl
new file mode 100644
index 00000000..1e0d5ff1
--- /dev/null
+++ b/subworkflows/tassel_genotyping.wdl
@@ -0,0 +1,65 @@
+version 1.0
+
+import "../structs/dna_seq_structs.wdl"
+import "../tasks/tassel.wdl"
+import "../tasks/BWA.wdl"
+import "../tasks/utils.wdl" as utils
+
+
+workflow TasselGenotyping {
+ input {
+ File families_info
+ ReferenceFasta references
+ Int max_cores
+ Int max_ram
+ String enzyme = "ApeKI"
+ }
+
+ call tassel.transposeSamples {
+ input:
+ families_info = families_info
+ }
+
+ Array[Array[String]] sample_file = read_tsv(transposeSamples.transpose_samples)
+
+ call tassel.BarcodeFaker {
+ input:
+ fastq = sample_file[0],
+ FullSampleName = sample_file[2]
+ }
+
+ call tassel.TasselBeforeAlign {
+ input:
+ fastq = BarcodeFaker.barcode_fastq,
+ enzyme = enzyme,
+ key_file = BarcodeFaker.key_file,
+ max_ram = max_ram
+ }
+
+ call BWA.RunBwaAlignment {
+ input:
+ sampleName = ["tasselSample"],
+ reads1 = [TasselBeforeAlign.fastq_align],
+ pair_end = false,
+ libraries = ["tasselSample"],
+ references = references,
+ max_cores = max_cores,
+ rm_dupli = false
+ }
+
+ call tassel.TasselAfterAlign {
+ input:
+ tassel_database = TasselBeforeAlign.tassel_database,
+ bam = RunBwaAlignment.bam[0],
+ max_ram = max_ram,
+ enzyme = enzyme,
+ key_file = BarcodeFaker.key_file,
+ fastq = BarcodeFaker.barcode_fastq
+ }
+
+ output {
+ Array[File] vcfs = [TasselAfterAlign.tassel_vcf]
+ Array[String] software_sele = ["tassel"]
+ Array[String] source_sele = ["vcf"]
+ }
+}
\ No newline at end of file
diff --git a/tasks/BWA.wdl b/tasks/BWA.wdl
index 60749b68..86129fff 100644
--- a/tasks/BWA.wdl
+++ b/tasks/BWA.wdl
@@ -9,27 +9,38 @@ task RunBwaAlignment {
input {
Array[String] sampleName
- Array[File] reads
+ Array[File] reads1
+ Array[File]? reads2
Array[String] libraries
ReferenceFasta references
+ Boolean pair_end
Int max_cores
String rm_dupli
}
- Int disk_size = ceil(size(reads, "GiB") * 2 + size(references.ref_fasta, "GiB") + 20)
+ Int disk_size = ceil(size(reads1, "GiB") * 2 + size(references.ref_fasta, "GiB") + 20)
Int memory_size = 4000 * max_cores
command <<<
mkdir tmp
- reads_list=( ~{sep=" " reads} )
+ reads1_list=( ~{sep=" " reads1} )
+ reads2_list=( ~{sep=" " reads2} )
lib_list=( ~{sep=" " libraries} )
sampleName_list=( ~{sep=" " sampleName})
BAMS=()
- for index in ${!reads_list[*]}; do
- echo "${reads_list[$index]} is in ${lib_list[$index]}"
+ for index in ${!reads1_list[*]}; do
+ echo "${reads1_list[$index]} is in ${lib_list[$index]}"
+
+ if "~{pair_end}" == "true"
+ then
+ reads=( "${reads1_list[$index]} ${reads2_list[$index]}" )
+ else
+ reads=( "${reads1_list[$index]}" )
+ fi
+
bwa_header="@RG\tID:${sampleName_list[$index]}.${lib_list[$index]}\tLB:lib-${lib_list[$index]}\tPL:illumina\tSM:${sampleName_list[$index]}\tPU:FLOWCELL1.LANE1.${lib_list[$index]}"
- /usr/gitc/./bwa mem -t ~{max_cores} -R "${bwa_header}" ~{references.ref_fasta} "${reads_list[$index]}" | \
+ /usr/gitc/./bwa mem -t ~{max_cores} -R "${bwa_header}" ~{references.ref_fasta} "$reads" | \
java -jar /usr/gitc/picard.jar SortSam \
I=/dev/stdin \
O="${sampleName_list[$index]}.${lib_list[$index]}.sorted.bam" \
@@ -92,6 +103,7 @@ task RunBwaAlignment {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -100,7 +112,7 @@ task RunBwaAlignment {
# Slurm
job_name: "RunBwaAlignment"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -185,6 +197,7 @@ task RunBwaAlignmentSimu {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -193,7 +206,7 @@ task RunBwaAlignmentSimu {
# Slurm
job_name: "RunBwaAlignmentSimu"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -227,6 +240,7 @@ task CreateChunksFastq {
runtime {
docker: "ubuntu:20.04"
+ singularity:"docker://ubuntu:20.04"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -234,7 +248,7 @@ task CreateChunksFastq {
# Slurm
job_name: "CreateChunksFastq"
mem:"~{memory_size}M"
- time:"00:05:00"
+ time: 1
}
meta {
diff --git a/tasks/JointReports.wdl b/tasks/JointReports.wdl
index 2d7a69aa..ae76562c 100644
--- a/tasks/JointReports.wdl
+++ b/tasks/JointReports.wdl
@@ -1,16 +1,171 @@
version 1.0
+
+task JointAllReports{
+ input{
+ Array[File]? SNPCallerMapsEmp
+ Array[File]? updogMaps
+ Array[File]? polyradMaps
+ Array[File]? supermassaMaps
+ Array[File]? gusmapMapsEmp
+ Array[File]? SNPCallerPolyMapsEmp
+ Array[File]? updogPolyMaps
+ Array[File]? polyradPolyMaps
+ Array[File]? supermassaPolyMaps
+ Int max_cores
+ Int ploidy
+ }
+
+ Int memory_size = 4000
+
+ command <<<
+ R --vanilla --no-save < 2){
+
+ SNPCallerPolyMapsEmp <- str_split("~{sep=';' SNPCallerPolyMapsEmp}", ";", simplify = TRUE)
+ updogPolyMaps <- str_split("~{sep=';' updogPolyMaps}", ";", simplify = TRUE)
+ polyradPolyMaps <- str_split("~{sep=';' polyradPolyMaps}", ";", simplify = TRUE)
+ supermassaPolyMaps <- str_split("~{sep=';' supermassaPolyMaps}", ";", simplify = TRUE)
+
+ all_files <- c(SNPCallerPolyMapsEmp, updogPolyMaps, polyradPolyMaps, supermassaPolyMaps)
+ all_files <- all_files[-which(all_files == "")]
+
+ system("mkdir results_all")
+
+ for(i in 1:length(all_files)) {
+ system(paste("tar -xvf ", all_files[i]))
+ system("mv results/* results_all")
+ system("rm -r results")
+ }
+
+ system("tar -czvf EmpiricalReads_results.tar.gz results_all")
+
+ } else {
+
+ SNPCallerMapsEmp <- str_split("~{sep=';' SNPCallerMapsEmp}", ";", simplify = TRUE)
+ updogMaps <- str_split("~{sep=';' updogMaps}", ";", simplify = TRUE)
+ polyradMaps <- str_split("~{sep=';' polyradMaps}", ";", simplify = TRUE)
+ supermassaMaps <- str_split("~{sep=';' supermassaMaps}", ";", simplify = TRUE)
+ gusmapMapsEmp <- str_split("~{sep=';' gusmapMapsEmp}", ";", simplify = TRUE)
+
+ files <- list(SNPCallerMapsEmp, updogMaps, polyradMaps, supermassaMaps, gusmapMapsEmp)
+ files <- files[-which(files == "")]
+
+ path_dir <- tempdir()
+ system(paste0("mkdir ", paste0(path_dir, c("/maps", "/filters", "/errors", "/times", "/RDatas"), collapse = " ")))
+ for(i in 1:length(files)){
+ for(j in 1:length(files[[i]])){
+ untar(files[[i]][[j]], exdir = path_dir)
+ list_files <- untar(files[[i]][[j]], exdir = path_dir, list = T)
+ system(paste0("mv ",path_dir, "/",list_files[1], "*_map_report.tsv.gz ", path_dir, "/maps"))
+ system(paste0("mv ",path_dir, "/",list_files[1], "*_times_report.tsv.gz ", path_dir, "/times"))
+ system(paste0("mv ",path_dir, "/",list_files[1], "*.RData ", path_dir, "/RDatas"))
+ if(!grepl("gusmap", list_files[1])){
+ system(paste0("mv ",path_dir, "/",list_files[1], "*_filters_report.tsv.gz ", path_dir, "/filters"))
+ system(paste0("mv ",path_dir, "/",list_files[1], "*_errors_report.tsv.gz ", path_dir, "/errors"))
+ }
+ }
+ }
+
+ files <- system(paste0("ls ", path_dir, "/maps/"), intern = T)
+ files <- paste0(path_dir, "/maps/", files)
+ maps <- vroom(files, num_threads = ~{max_cores})
+
+ files <- system(paste0("ls ", path_dir, "/filters/"), intern = T)
+ files <- paste0(path_dir, "/filters/", files)
+ filters <- vroom(files, num_threads = ~{max_cores})
+
+ files <- system(paste0("ls ", path_dir, "/errors/"), intern = T)
+ files <- paste0(path_dir, "/errors/", files)
+ errors <- vroom(files, num_threads = ~{max_cores})
+
+ files <- system(paste0("ls ", path_dir, "/times/"), intern = T)
+ files <- paste0(path_dir, "/times/", files)
+ times <- vroom(files, num_threads = ~{max_cores})
+
+ files <- system(paste0("ls ", path_dir, "/RDatas/"), intern = T)
+ files <- paste0(path_dir, "/RDatas/", files)
+
+ all_RDatas <- list()
+ for(i in 1:length(files)){
+ map_temp <- load(files[i])
+ all_RDatas[[i]] <- get(map_temp)
+ }
+
+ names(all_RDatas) <- basename(files)
+
+ if(any(grepl("gusmap", names(all_RDatas)))){
+ gusmap_RDatas <- all_RDatas[grep("gusmap", names(all_RDatas))]
+ save(gusmap_RDatas, file = "gusmap_RDatas.RData")
+ RDatas <- all_RDatas[-grep("gusmap", names(all_RDatas))]
+ } else RDatas <- all_RDatas
+
+ # # Converting onemap sequencig objects to list. LargeList do not accept other class
+ # # Also because of this gusmap is separated, because the developers worked with enviroments, not classes
+ for(i in 1:length(RDatas)){
+ class(RDatas[[i]]) <- "list"
+ }
+
+ saveList(RDatas, file = "sequences_emp.llo", append=FALSE, compress=TRUE)
+
+ new_names <- names(all_RDatas)
+ vroom_write(as.data.frame(new_names), "names.tsv.gz")
+
+ # Outputs
+ vroom_write(errors, "data1_depths_geno_prob.tsv.gz", num_threads = ~{max_cores})
+ vroom_write(maps, "data2_maps.tsv.gz", num_threads = ~{max_cores})
+ vroom_write(filters, "data3_filters.tsv.gz", num_threads = ~{max_cores})
+ vroom_write(times, "data4_times.tsv.gz", num_threads = ~{max_cores})
+
+ system("mkdir EmpiricalReads_results")
+
+ if(any(grepl("gusmap", names(all_RDatas)))){
+ system("mv gusmap_RDatas.RData sequences_emp.llo data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results")
+ } else system("mv sequences_emp.llo data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results")
+
+ system("tar -czvf EmpiricalReads_results.tar.gz EmpiricalReads_results")
+
+ }
+
+ RSCRIPT
+ >>>
+
+ runtime{
+ docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
+ cpu: max_cores
+ # Cloud
+ memory:"~{memory_size} MiB"
+ disks:"local-disk 2 GiB HDD"
+ # Slurm
+ job_name: "JointAllReports"
+ mem:"~{memory_size}M"
+ time: 2
+ }
+
+ output {
+ File EmpiricalReads_results = "EmpiricalReads_results.tar.gz"
+ }
+}
+
+
task JointReports{
input{
- Array[File?] SNPCaller
- Array[File?] updog
- Array[File?] polyrad
- Array[File?] supermassa
- Array[File?] gusmap
+ Array[File]? SNPCaller
+ Array[File]? updog
+ Array[File]? polyrad
+ Array[File]? supermassa
+ Array[File]? gusmap
Int max_cores
}
- Int disk_size = ceil(size(SNPCaller, "GiB") * 1.5 + size(updog, "GiB") * 1.5 + size(polyrad, "GiB") * 1.5 + size(supermassa, "GiB") * 1.5 + size(gusmap, "GiB") * 1.5)
+ Int disk_size = 4000
Int memory_size = 4000
command <<<
@@ -104,6 +259,7 @@ task JointReports{
runtime{
docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -111,7 +267,7 @@ task JointReports{
# Slurm
job_name: "JointReports"
mem:"~{memory_size}M"
- time:"01:40:00"
+ time: 2
}
output{
@@ -247,6 +403,7 @@ task JointReportsSimu {
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -254,7 +411,7 @@ task JointReportsSimu {
# Slurm
job_name: "JointReports"
mem:"~{memory_size}M"
- time:"01:40:00"
+ time: 2
}
meta {
@@ -370,6 +527,7 @@ task JointTablesSimu{
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -377,7 +535,7 @@ task JointTablesSimu{
# Slurm
job_name: "JointTables"
mem:"~{memory_size}M"
- time:"01:40:00"
+ time: 2
}
output {
@@ -435,6 +593,7 @@ task JointReportsPoly{
runtime{
docker:"ubuntu:20.04"
+ singularity: "docker://ubuntu:20.04"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -442,7 +601,7 @@ task JointReportsPoly{
# Slurm
job_name: "JointReports"
mem:"~{memory_size}M"
- time:"01:40:00"
+ time: 2
}
output{
diff --git a/tasks/bcftools.wdl b/tasks/bcftools.wdl
index 160fe460..b0f22141 100644
--- a/tasks/bcftools.wdl
+++ b/tasks/bcftools.wdl
@@ -13,7 +13,7 @@ task BiallelicNormalization {
}
Int disk_size = ceil(size(vcf_file, "GiB") + size(reference, "GiB") + 2)
- Int memory_size = 7000
+ Int memory_size = 4000 + ceil(size(vcf_file, "MiB") + size(reference, "MiB") + 2)
command <<<
@@ -30,6 +30,7 @@ task BiallelicNormalization {
runtime {
docker: "lifebitai/bcftools:1.10.2"
+ singularity:"docker://lifebitai/bcftools:1.10.2"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -37,7 +38,7 @@ task BiallelicNormalization {
# Slurm
job_name: "BiallelicNormalization"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
diff --git a/tasks/chunk_lists.wdl b/tasks/chunk_lists.wdl
index 689126d7..0ad7c3d4 100644
--- a/tasks/chunk_lists.wdl
+++ b/tasks/chunk_lists.wdl
@@ -7,11 +7,14 @@ task SepareChunksFastqString {
}
Int disk_size = ceil(size(families_info, "GiB") * 2)
- Int memory_size = 1000
+ Int memory_size = 4000 + ceil(size(families_info, "MiB") * 2)
command <<<
R --vanilla --no-save < 3) { #pair-end
+ df <- df[,c(1,3,4,2)]
+ }
split_df <- split.data.frame(df, df[,2])
n_chunk <- as.integer(length(split_df)/~{chunk_size})
@@ -22,6 +25,7 @@ task SepareChunksFastqString {
for(i in 1:length(chunk_sep)){
df <- do.call(rbind, unlist(chunk_sep[i], recursive = F))
df <- t(df)
+ print(df)
write.table(df, file = paste0("chunk_",i, ".txt"), quote = F, col.names = F, row.names = F, sep="\t")
}
@@ -31,6 +35,7 @@ task SepareChunksFastqString {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -38,7 +43,7 @@ task SepareChunksFastqString {
# Slurm
job_name: "SepareChunksIndividuals"
mem:"~{memory_size}M"
- time:"00:10:00"
+ time: 1
}
meta {
@@ -60,7 +65,7 @@ task SepareChunksFastq {
}
Int disk_size = ceil(size(fastqs, "GiB") * 2)
- Int memory_size = 1000
+ Int memory_size = 1000 + ceil(size(fastqs, "MiB") * 2)
command <<<
R --vanilla --no-save <>>
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -52,7 +54,7 @@ task HaplotypeCaller {
# Slurm
job_name: "HaplotypeCaller"
mem:"~{memory_size}M"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -75,12 +77,13 @@ task ImportGVCFs {
File reference_fai
File reference_dict
String interval
+ Int max_ram = 26000
}
Int disk_size = ceil(size(vcfs, "GiB") * 1.5 + size(reference_fasta, "GiB") * 1.5)
- Int memory_max = 2300
- Int memory_min = 2000
- Int memory_size = 26000
+ Int memory_max = ceil(max_ram - 1000)
+ Int memory_min = ceil(max_ram/3.85)
+ Int memory_size = max_ram
command <<<
set -euo pipefail
@@ -103,6 +106,7 @@ task ImportGVCFs {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 4
# Cloud
memory:"~{memory_size} MiB"
@@ -110,7 +114,7 @@ task ImportGVCFs {
# Slurm
job_name: "ImportGVCFs"
mem:"~{memory_size}M"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -131,12 +135,13 @@ task GenotypeGVCFs {
File reference_fai
File reference_dict
String interval
+ Int max_ram = 26000
}
Int disk_size = ceil(size(reference_fasta, "GiB") * 1.5 + size(workspace_tar, "GiB") * 1.5)
- Int memory_max = 2300
- Int memory_min = 2000
- Int memory_size = 26000
+ Int memory_max = ceil(max_ram - 1000)
+ Int memory_min = ceil(max_ram/3.85)
+ Int memory_size = max_ram
command <<<
set -euo pipefail
@@ -155,6 +160,7 @@ task GenotypeGVCFs {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 2
# Cloud
memory:"~{memory_size} MiB"
@@ -162,7 +168,7 @@ task GenotypeGVCFs {
# Slurm
job_name: "GenotypeGVCFs"
mem:"~{memory_size}M"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -185,8 +191,8 @@ task MergeVCFs {
}
Int disk_size = ceil(size(input_vcfs, "GiB") * 2.5) + 10
- Int memory_max = 2800
- Int memory_min = 2600
+ Int memory_max = 2600
+ Int memory_min = 2500
Int memory_size = 3000
command <<<
@@ -199,6 +205,7 @@ task MergeVCFs {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -206,7 +213,7 @@ task MergeVCFs {
# Slurm
job_name: "MergeVCFs"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -231,7 +238,7 @@ task VariantsToTable {
}
Int disk_size = ceil(size(reference, "GB") + size(vcf_file, "GB") + 2)
- Int memory_size = 2500
+ Int memory_size = 2500 + ceil(size(reference, "MB") + size(vcf_file, "MB") + 2)
command <<<
@@ -250,6 +257,7 @@ task VariantsToTable {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -257,7 +265,7 @@ task VariantsToTable {
# Slurm
job_name: "VariantsToTable"
mem:"~{memory_size}M"
- time:"01:40:00"
+ time: 2
}
meta {
@@ -282,7 +290,7 @@ task VariantFiltration {
}
Int disk_size = ceil(size(vcf_file, "GB") + size(reference, "GB") + 1)
- Int memory_size = 3000
+ Int memory_size = 3000 + ceil(size(vcf_file, "MB") + size(reference, "MB") + 1)
command <<<
/usr/gitc/gatk4/./gatk VariantFiltration \
@@ -306,6 +314,7 @@ task VariantFiltration {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -313,7 +322,7 @@ task VariantFiltration {
# Slurm
job_name: "VariantFiltration"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -339,7 +348,7 @@ task VariantsToTableForHardFilteringSimulated {
}
Int disk_size = ceil(size(reference, "GB") + size(vcf_file, "GB") + size(simu_vcf, "GB") + 2)
- Int memory_size = 3000
+ Int memory_size = 3000 + ceil(size(reference, "MB") + size(vcf_file, "MB") + size(simu_vcf, "MB") + 2)
command <<<
/usr/gitc/./bgzip -c ~{simu_vcf} > ~{simu_vcf}.gz
@@ -393,6 +402,7 @@ task VariantsToTableForHardFilteringSimulated {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity:"docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -400,7 +410,7 @@ task VariantsToTableForHardFilteringSimulated {
# Slurm
job_name: "VariantsToTable"
mem:"~{memory_size}M"
- time:"01:40:00"
+ time: 2
}
meta {
@@ -428,7 +438,7 @@ task VariantEval {
}
Int disk_size = ceil(size(vcf_norm, "GiB") + size(reference, "GiB") + size(vcf_simu, "GiB") + 2)
- Int memory_size = 3000
+ Int memory_size = 3000 + ceil(size(vcf_norm, "MiB") + size(reference, "MiB") + size(vcf_simu, "MiB") + 2)
command <<<
java -jar /usr/gitc/GATK35.jar -T VariantEval -R ~{reference} -eval ~{vcf_norm} ~{"-D " + vcf_simu} -EV ValidationReport -EV CountVariants -ploidy ~{ploidy} -o vcfEval.txt
@@ -436,6 +446,7 @@ task VariantEval {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity: "docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -443,7 +454,7 @@ task VariantEval {
# Slurm
job_name: "VariantEval"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
diff --git a/tasks/gusmap.wdl b/tasks/gusmap.wdl
index 471e3b4e..67290d9a 100644
--- a/tasks/gusmap.wdl
+++ b/tasks/gusmap.wdl
@@ -47,6 +47,7 @@ task GusmapReport {
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -54,7 +55,7 @@ task GusmapReport {
# Slurm
job_name: "GusmapReport"
mem:"~{memory_size}G"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -154,6 +155,7 @@ task GusmapReportForSimulated {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -161,7 +163,7 @@ task GusmapReportForSimulated {
# Slurm
job_name: "GusmapReport"
mem:"~{memory_size}M"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -200,6 +202,7 @@ task CompressGusmap {
runtime {
docker:"ubuntu:20.04"
+ singularity: "docker://ubuntu:20.04"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -207,7 +210,7 @@ task CompressGusmap {
# Slurm
job_name: "CompressGusmap"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -244,6 +247,7 @@ task CompressGusmapSimu {
runtime {
docker:"ubuntu:20.04"
+ singularity: "docker://ubuntu:20.04"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -251,7 +255,7 @@ task CompressGusmapSimu {
# Slurm
job_name: "CompressGusmap"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
diff --git a/tasks/mappoly.wdl b/tasks/mappoly.wdl
index f5bc1d8c..377d2d5b 100644
--- a/tasks/mappoly.wdl
+++ b/tasks/mappoly.wdl
@@ -136,6 +136,7 @@ task MappolyReport {
runtime {
docker:"cristaniguti/reads2map:0.0.5"
+ singularity: "docker://cristaniguti/reads2map:0.0.5"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -143,7 +144,7 @@ task MappolyReport {
# Slurm
job_name: "MappolyReport"
mem:"~{memory_size}G"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -153,6 +154,6 @@ task MappolyReport {
}
output {
- File results = "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_results.tar.gz"
+ File results = "~{SNPCall_program}_~{GenotypeCall_program}Poly_~{CountsFrom}_results.tar.gz"
}
}
\ No newline at end of file
diff --git a/tasks/mchap.wdl b/tasks/mchap.wdl
index e5486d46..57526da8 100644
--- a/tasks/mchap.wdl
+++ b/tasks/mchap.wdl
@@ -13,7 +13,7 @@ task OneMCHap {
}
Int disk_size = ceil(size(bams, "GiB") * 1.5 + size(bed, "GiB") * 1.5 + size(vcf_file, "GiB") * 1.5 + size(reference, "GiB"))
- Int memory_size = 3000
+ Int memory_size = 3000 + ceil(size(bams, "MiB") * 1.5 + size(bed, "MiB") * 1.5 + size(vcf_file, "MiB") * 1.5 + size(reference, "MiB"))
command <<<
@@ -46,6 +46,7 @@ task OneMCHap {
runtime {
docker: "cristaniguti/mchap:0.0.1"
+ singularity: "docker://cristaniguti/mchap:0.0.1"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -53,7 +54,7 @@ task OneMCHap {
# Slurm
job_name: "MCHap"
mem:"~{memory_size}M"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -77,7 +78,7 @@ task OneMCHap_recall {
}
Int disk_size = ceil(size(bams, "GiB") * 1.25 + size(vcf_file, "GiB") * 1.5)
- Int memory_size = 3000
+ Int memory_size = 3000 + ceil(size(bams, "MiB") * 1.25 + size(vcf_file, "MiB") * 1.5)
command <<<
@@ -104,6 +105,7 @@ task OneMCHap_recall {
runtime {
docker: "cristaniguti/mchap:0.0.1"
+ singularity: "docker://cristaniguti/mchap:0.0.1"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -111,7 +113,7 @@ task OneMCHap_recall {
# Slurm
job_name: "OneMCHap_recall"
mem:"~{memory_size}M"
- time:"24:00:00"
+ time: 24
}
meta {
diff --git a/tasks/pedigree_simulator.wdl b/tasks/pedigree_simulator.wdl
index 49d8ce3a..a1f56744 100644
--- a/tasks/pedigree_simulator.wdl
+++ b/tasks/pedigree_simulator.wdl
@@ -23,6 +23,7 @@ task RunPedigreeSimulator {
runtime {
docker: "cristaniguti/java-in-the-cloud:0.0.1"
+ singularity: "docker://cristaniguti/java-in-the-cloud:0.0.1"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -30,7 +31,7 @@ task RunPedigreeSimulator {
# Slurm
job_name: "RunPedigreeSimulator"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
diff --git a/tasks/pedigree_simulator_utils.wdl b/tasks/pedigree_simulator_utils.wdl
index 669c7ffb..975d36bb 100644
--- a/tasks/pedigree_simulator_utils.wdl
+++ b/tasks/pedigree_simulator_utils.wdl
@@ -173,6 +173,7 @@ task CreatePedigreeSimulatorInputs {
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -180,7 +181,7 @@ task CreatePedigreeSimulatorInputs {
# Slurm
job_name: "CreatePedigreeSimulatorInputs"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -277,6 +278,7 @@ task ConvertPedigreeSimulationToVcf {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -284,7 +286,7 @@ task ConvertPedigreeSimulationToVcf {
# Slurm
job_name: "ConvertPedigreeSimulationToVcf"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -349,6 +351,7 @@ task Vcf2PedigreeSimulator{
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -356,7 +359,7 @@ task Vcf2PedigreeSimulator{
# Slurm
job_name: "Vcf2PedigreeSimulator"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -395,6 +398,7 @@ task ProduceFamiliesSeeds {
runtime {
docker:"python:3.7"
+ singularity: "docker://python:3.7"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -402,7 +406,7 @@ task ProduceFamiliesSeeds {
# Slurm
job_name: "ProduceFamiliesSeeds"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
output {
diff --git a/tasks/pirs.wdl b/tasks/pirs.wdl
index 7fde5ef0..9d2fb76f 100644
--- a/tasks/pirs.wdl
+++ b/tasks/pirs.wdl
@@ -16,6 +16,7 @@ task GenerateAlternativeGenome {
runtime {
docker: "cristaniguti/pirs-ddrad-cutadapt:0.0.1"
+ singularity: "docker://cristaniguti/pirs-ddrad-cutadapt:0.0.1"
cpu:1
# Cloud
memory:"3000 MiB"
@@ -23,7 +24,7 @@ task GenerateAlternativeGenome {
# Slurm
job_name: "GenerateAlternativeGenome"
mem:"3000M"
- time:"01:00:00"
+ time: 1
}
meta {
diff --git a/tasks/radinitio.wdl b/tasks/radinitio.wdl
index af6a1183..d2cbbce5 100644
--- a/tasks/radinitio.wdl
+++ b/tasks/radinitio.wdl
@@ -99,6 +99,7 @@ task RADinitioSimulation {
runtime {
docker: "cristaniguti/radinitio:0.0.1"
+ singularity: "docker://cristaniguti/radinitio:0.0.1"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -108,7 +109,7 @@ task RADinitioSimulation {
# Slurm
job_name: "RADinitioSimulation"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
diff --git a/tasks/simuscop.wdl b/tasks/simuscop.wdl
index 1f588f17..62320687 100644
--- a/tasks/simuscop.wdl
+++ b/tasks/simuscop.wdl
@@ -37,6 +37,7 @@ task SimuscopProfile {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -44,7 +45,7 @@ task SimuscopProfile {
# Slurm
job_name: "SimuscopProfile"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -114,6 +115,7 @@ task SimuscopSimulation {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -121,7 +123,7 @@ task SimuscopSimulation {
# Slurm
job_name: "SimuscopSimulation"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
diff --git a/tasks/stacks.wdl b/tasks/stacks.wdl
index 23c8117a..e2fa5fb2 100644
--- a/tasks/stacks.wdl
+++ b/tasks/stacks.wdl
@@ -9,7 +9,7 @@ task ProcessRadTags {
}
Int disk_size = ceil(size(fq_files, "GiB") * 2)
- Int memory_size = 6000
+ Int memory_size = 4000 + ceil(size(fq_files, "MiB") * 2)
command <<<
mkdir raw process_radtags_results
@@ -24,6 +24,7 @@ task ProcessRadTags {
runtime {
docker:"cristaniguti/stacks:0.0.1"
+ singularity: "docker://cristaniguti/stacks:0.0.1"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -31,10 +32,100 @@ task ProcessRadTags {
# Slurm
job_name: "ProcessRadTags"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
output {
Array[File] seq_results = glob("process_radtags_results/*.fq.gz")
}
}
+
+task CreatePopMapFile {
+ input {
+ Array[File] bams
+ }
+
+ command <<<
+ R --vanilla --no-save <>>
+
+ runtime {
+ docker:"cristaniguti/r-samtools:latest"
+ singularity: "docker://cristaniguti/r-samtools:latest"
+ cpu:1
+ # Cloud
+ memory:"1000 MiB"
+ disks:"local-disk 1 GiB HDD"
+ # Slurm
+ job_name: "CreatePopMapFile"
+ mem:"1000M"
+ time: 10
+ }
+
+ meta {
+ author: "Cristiane Taniguti"
+ email: "chtaniguti@tamu.edu"
+ description: "Reads a SAM file to determine the potential positions of Tags against the reference genome. Identifies SNPs from the aligned tags"
+ }
+
+ output {
+ File pop_map = "pop_map.tsv"
+ }
+}
+
+task RefMap {
+ input {
+ Array[File] bams
+ File pop_map
+ Int max_cores
+ }
+
+ Int disk_size = ceil(size(bams, "GiB") * 2)
+ Int memory_size = 4000 + ceil(size(bams, "MiB") * 2)
+
+ command <<<
+
+ mkdir aligned stacks
+ ln -s ~{sep=" " bams} aligned
+
+ gstacks -I aligned/ -M ~{pop_map} -O stacks/ -t ~{max_cores}
+
+ populations -P stacks/ -M ~{pop_map} --vcf -t ~{max_cores}
+
+ >>>
+
+ runtime {
+ docker:"cristaniguti/stacks:0.0.1"
+ singularity: "docker://cristaniguti/stacks:0.0.1"
+ cpu:1
+ # Cloud
+ memory:"~{memory_size} MiB"
+ disks:"local-disk " + disk_size + " HDD"
+ # Slurm
+ job_name: "ProcessRadTags"
+ mem:"~{memory_size}M"
+ time: 10
+ }
+
+ meta {
+ author: "Cristiane Taniguti"
+ email: "chtaniguti@tamu.edu"
+ description: "Reads a SAM file to determine the potential positions of Tags against the reference genome. Identifies SNPs from the aligned tags"
+ }
+
+ output {
+ File stacks_vcf = "stacks/populations.snps.vcf"
+ File stacks_multiallelics = "stacks/populations.haps.vcf"
+ }
+}
diff --git a/tasks/tassel.wdl b/tasks/tassel.wdl
new file mode 100644
index 00000000..daf78c3e
--- /dev/null
+++ b/tasks/tassel.wdl
@@ -0,0 +1,323 @@
+version 1.0
+
+
+task transposeSamples {
+ input {
+ File families_info
+ }
+
+ command <<<
+ R --vanilla --no-save <>>
+
+ runtime {
+ docker: "cristaniguti/r-samtools:latest"
+ singularity: "docker://cristaniguti/r-samtools:latest"
+ cpu: 1
+ # Cloud
+ memory:"1 GiB"
+ disks:"local-disk 3 GiB HDD"
+ # Slurm
+ job_name: "transposeSamples"
+ mem:"1G"
+ time: 4
+ }
+
+ meta {
+ author: "Cristiane Taniguti"
+ email: "chtaniguti@tamu.edu"
+ description: "transpose the sample info"
+ }
+
+ output {
+ File transpose_samples = "transpose_sample.tsv"
+ }
+}
+
+task BarcodeFaker {
+ input {
+ Array[File] fastq
+ Array[String] FullSampleName
+ }
+
+ command <<<
+ R --vanilla --no-save < 0]) # subset the real and positive roots, and round up
+ if(r < 4){r <- 4} # keep a minimum barcode length of 4, probably unnecessary
+
+ fake_barcodes <- barcode_inventor(fake_barcodes, r, fastqs)
+
+ #make the fake perfect quality scores matching the barcode length
+ fake_quals <- rep(paste(rep("E", times = r), collapse = ""), times = length(fastqs))
+
+
+ #make fake header files for FASTQ reads
+ #headers are totally fake (do not refer to input file)
+ #fake headers allows interoperability between TASSEL and demultiplexing software FASTQ formats
+ fake_flowcell_no <- seq(from = 1, to = length(fastqs), by = 1)
+ fake_headers <- paste("D00553R:56:", "C8B56ANXX", fake_flowcell_no, ":3:1101:1203:2037 1:N:0:3", sep = "")
+ fake_flowcells <- paste("C8B56ANXX", fake_flowcell_no, sep = "")
+ fake_lanes <- rep(3, times = length(fastqs))
+
+ #make fake file names that appropriately reference fake flowcell and lane
+ newfile_names <- paste(fake_flowcells, "_", "3","_","fastq.fastq", sep = "")
+
+ #stop if files with the fake file names already exist
+ workdir <- getwd()
+ dirfil <- list.files(path = workdir, full.names = FALSE)
+ if(sum(newfile_names %in% dirfil) != 0){
+ stop("Files in your working directory seem to have same names as those
+ output by this function. Please move them, delete them, or choose a new working directory.")
+ }
+
+ #write a key of barcode, flowcell, lane and new file names
+ key <- cbind(Flowcell = fake_flowcells, Lane = fake_lanes, Barcode = fake_barcodes, FullSampleName)
+ write.table(key, "fakebarcodes_key.txt", row.names = FALSE, sep = "\t", quote = FALSE)
+
+ #read 400 lines per file at a time, paste on the barcodes and quality scores, make unique flowcells,
+ #and write them out with unique names
+ for(i in 1:length(fastqs)){
+ incon <- file(fastqs[i], open = "r")
+ outcon <- file(newfile_names[i], open = "w")
+ while(length(mylines <- readLines(incon, n = 400, warn = FALSE))){
+ head_pos <- seq(1, length(mylines), by = 4)
+ seq_pos <- seq(2, length(mylines), by = 4)
+ qual_pos <- seq(4, length(mylines), by = 4)
+
+ for(j in 1:length(mylines)){
+ mylines[head_pos[j]] <- fake_headers[i]
+ mylines[seq_pos[j]] <- paste(fake_barcodes[i], mylines[seq_pos[j]], sep = "")
+ mylines[qual_pos[j]] <- paste(fake_quals[i], mylines[qual_pos[j]], sep = "")
+ }
+ writeLines(mylines, outcon)
+ }
+ print(paste("Finished writing File", i, "...", sep = " "))
+ #close the connections
+ close(outcon, warn = FALSE)
+ close(incon, warn = FALSE)
+ }
+ }
+
+ file_names <- "~{sep=',' fastq}"
+ file_names <- unlist(strsplit(file_names, ","))
+ is_gz <- basename(file_names[1])
+ if(grepl("gz", is_gz)) system(paste("gunzip", file_names))
+ dir_name <- dirname(file_names[1])
+
+ sample_names <- "~{sep=',' FullSampleName}"
+ sample_names <- unlist(strsplit(sample_names, ","))
+
+ barcode_faker(fastq_dir = dir_name, FullSampleName = sample_names)
+
+ RSCRIPT
+ >>>
+
+ runtime {
+ docker: "cristaniguti/r-samtools:latest"
+ singularity: "docker://cristaniguti/r-samtools:latest"
+ cpu: 1
+ # Cloud
+ memory:"2 GiB"
+ disks:"local-disk 3 GiB HDD"
+ # Slurm
+ job_name: "BarcodeFaker"
+ mem:"2G"
+ time: 4
+ }
+
+ meta {
+ author: "Marlee Labroo function adapted by Cristiane Taniguti"
+ email: "chtaniguti@tamu.edu"
+ description: "Add fake barcode adaptors to fastq sequences to be input for TASSEL"
+ }
+
+ output {
+ File key_file = "fakebarcodes_key.txt"
+ Array[File] barcode_fastq = glob("*.fastq")
+ }
+}
+
+task TasselBeforeAlign {
+ input {
+ Array[File] fastq
+ String enzyme = "ApeKI"
+ File key_file
+ Int max_ram = 10000
+ }
+
+ Int disk_size = ceil(size(fastq, "GiB"))
+ Int memory_min = ceil(max_ram/2)
+
+ command <<<
+
+ mkdir fastqs
+ ln -s ~{sep=" " fastq} fastqs
+
+ /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -GBSSeqToTagDBPlugin -e ~{enzyme} -i fastqs \
+ -db GBSV2.db \
+ -k ~{key_file} \
+ -kmerLength 64 -minKmerL 20 -mnQS 20 -mxKmerNum 100000000 -endPlugin -runfork1
+
+ /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -TagExportToFastqPlugin -db GBSV2.db \
+ -o tagsForAlign.fa.gz -c 1 -endPlugin -runfork1
+
+ >>>
+
+ runtime {
+ docker: "cristaniguti/java-in-the-cloud:0.0.2"
+ singularity: "docker://cristaniguti/java-in-the-cloud:0.0.2"
+ cpu: 1
+ # Cloud
+ memory:"~{max_ram} MiB"
+ disks:"local-disk " + disk_size + " HDD"
+ # Slurm
+ job_name: "GBSSeqToTagDBPlugin"
+ mem:"~{max_ram}M"
+ time: 24
+ }
+
+ meta {
+ author: "Cristiane Taniguti"
+ email: "chtaniguti@tamu.edu"
+ description: "GBSSeqToTagDBPlugin takes fastQ files as input, identifies tags and the taxa in which they appear, and stores this data to a local database. Retrieves distinct tags stored in the database and reformats them to a FASTQ file"
+ }
+
+ output {
+ File tassel_database = "GBSV2.db"
+ File fastq_align = "tagsForAlign.fa.gz"
+ }
+}
+
+task TasselAfterAlign {
+ input {
+ File tassel_database
+ File bam
+ Int max_ram = 10000
+ String enzyme
+ File key_file
+ Array[File] fastq
+ }
+
+ Int disk_size = ceil(size(tassel_database, "GiB"))
+ Int memory_min = ceil(max_ram/2)
+
+ command <<<
+
+ samtools view -h ~{bam} > file.sam
+ mv ~{tassel_database} .
+
+ /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -SAMToGBSdbPlugin \
+ -i file.sam \
+ -db GBSV2.db \
+ -aProp 0.0 -aLen 0 -endPlugin -runfork1
+
+ /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -DiscoverySNPCallerPluginV2 \
+ -db GBSV2.db \
+ -mnLCov 0.1 -mnMAF 0.01 -deleteOldData true -endPlugin -runfork1
+
+ /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -SNPQualityProfilerPlugin \
+ -db GBSV2.db \
+ -deleteOldData true -endPlugin -runfork1
+
+ mkdir fastqs
+ ln -s ~{sep=" " fastq} fastqs
+
+ /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -ProductionSNPCallerPluginV2 \
+ -db GBSV2.db \
+ -e ~{enzyme} -i fastqs \
+ -k ~{key_file} \
+ -kmerLength 64 \
+ -o tassel.vcf -endPlugin -runfork1
+
+ >>>
+
+ runtime {
+ docker: "cristaniguti/java-in-the-cloud:0.0.2"
+ singularity: "docker://cristaniguti/java-in-the-cloud:0.0.2"
+ cpu: 1
+ # Cloud
+ memory:"~{max_ram} MiB"
+ disks:"local-disk " + disk_size + " HDD"
+ # Slurm
+ job_name: "SAMToGBSdbPlugin"
+ mem:"~{max_ram}M"
+ time: 24
+ }
+
+ meta {
+ author: "Cristiane Taniguti"
+ email: "chtaniguti@tamu.edu"
+ description: "Reads a SAM file to determine the potential positions of Tags against the reference genome. Identifies SNPs from the aligned tags"
+ }
+
+ output {
+ File tassel_vcf = "tassel.vcf"
+ }
+
+}
diff --git a/tasks/utils.wdl b/tasks/utils.wdl
index ee3fd387..e98428a5 100644
--- a/tasks/utils.wdl
+++ b/tasks/utils.wdl
@@ -6,7 +6,7 @@ task mergeVCFs {
}
Int disk_size = ceil(size(haplo_vcf, "GiB") * 2)
- Int memory_size = 5000
+ Int memory_size = ceil(3000 + size(haplo_vcf, "MiB") * 2)
command <<<
@@ -32,6 +32,7 @@ task mergeVCFs {
runtime {
docker:"lifebitai/bcftools:1.10.2"
+ singularity: "docker://lifebitai/bcftools:1.10.2"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -39,7 +40,7 @@ task mergeVCFs {
# Slurm
job_name: "mergeVCFs"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -62,7 +63,7 @@ task GenerateSampleNames { # TODO: probably a name like 'ReadSamplesNamesInVcf'
}
Int disk_size = ceil(size(simulated_vcf, "GiB") * 2)
- Int memory_size = 1000
+ Int memory_size = ceil(1000 + size(simulated_vcf, "MiB") * 2)
command <<<
export PATH=$PATH:/opt/conda/bin
@@ -80,6 +81,7 @@ task GenerateSampleNames { # TODO: probably a name like 'ReadSamplesNamesInVcf'
runtime {
docker: "cristaniguti/miniconda-alpine:0.0.1"
+ singularity: "docker://cristaniguti/miniconda-alpine:0.0.1"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -87,7 +89,7 @@ task GenerateSampleNames { # TODO: probably a name like 'ReadSamplesNamesInVcf'
# Slurm
job_name: "GenerateSampleNames"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -112,7 +114,7 @@ task ApplyRandomFilters {
}
Int disk_size = ceil(size(gatk_vcf, "GiB") * 2 + size(freebayes_vcf, "GiB") * 2 + size(gatk_vcf_bam_counts, "GiB") * 2 + size(freebayes_vcf_bam_counts, "GiB") * 2)
- Int memory_size = 3000
+ Int memory_size = ceil(3000 + size(gatk_vcf, "MiB") * 2 + size(freebayes_vcf, "MiB") * 2 + size(gatk_vcf_bam_counts, "MiB") * 2 + size(freebayes_vcf_bam_counts, "MiB") * 2)
command <<<
# Required update to deal with polyploids
@@ -128,6 +130,7 @@ task ApplyRandomFilters {
runtime {
docker:"cristaniguti/split_markers:0.0.1"
+ singularity: "docker://cristaniguti/split_markers:0.0.1"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -135,7 +138,7 @@ task ApplyRandomFilters {
# Slurm
job_name: "ApplyRandomFilters"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -155,34 +158,44 @@ task ApplyRandomFilters {
task ApplyRandomFiltersArray {
input{
Array[File] vcfs
- Array[String] vcfs_software
- Array[String] vcfs_counts_source
+ Array[String] vcfs_SNPCall_software
+ Array[String] vcfs_Counts_source
+ Array[String] vcfs_GenoCall_software
String? filters
String? chromosome
}
Int disk_size = ceil(size(vcfs, "GiB") * 2)
- Int memory_size = 3000
+ Int memory_size = ceil(3000 + size(vcfs, "MiB") * 2)
command <<<
vcfs=(~{sep = " " vcfs})
- vcfs_software=(~{sep=" " vcfs_software})
- vcfs_counts_source=(~{sep=" " vcfs_counts_source})
+ vcfs_snp_software=(~{sep=" " vcfs_SNPCall_software})
+ vcfs_counts_source=(~{sep=" " vcfs_Counts_source})
+ vcfs_geno_software=(~{sep=" " vcfs_GenoCall_software})
for index in ${!vcfs[*]}; do
- cp ${vcfs[$index]} temp.vcf
- tabix -p vcf temp.vcf
- bcftools view temp.vcf ~{filters} -r ~{chromosome} \
- -o vcf_filt_${vcfs_software[$index]}_${vcfs_counts_source[$index]}.vcf.gz
- rm temp.vcf temp.vcf.tbi
- echo vcf_filt_${vcfs_software[$index]}_${vcfs_counts_source[$index]}.vcf.gz >> outputs.txt
+ if [[ ${vcfs[$index]} != *.gz ]]; then
+ cp ${vcfs[$index]} temp.vcf
+ bgzip temp.vcf
+ else
+ cp ${vcfs[$index]} temp.vcf.gz
+ fi
+
+ tabix -p vcf temp.vcf.gz
+ bcftools view temp.vcf.gz ~{filters} ~{" -r " + chromosome} \
+ -o vcf_filt_${vcfs_snp_software[$index]}_${vcfs_counts_source[$index]}_${vcfs_geno_software[$index]}.vcf
+ bgzip vcf_filt_${vcfs_snp_software[$index]}_${vcfs_counts_source[$index]}_${vcfs_geno_software[$index]}.vcf
+ rm temp.vcf.gz temp.vcf.gz.tbi
+ echo vcf_filt_${vcfs_snp_software[$index]}_${vcfs_counts_source[$index]}_${vcfs_geno_software[$index]}.vcf.gz >> outputs.txt
done
>>>
runtime {
docker:"lifebitai/bcftools:1.10.2"
+ singularity: "docker://lifebitai/bcftools:1.10.2"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -190,7 +203,7 @@ task ApplyRandomFiltersArray {
# Slurm
job_name: "ApplyRandomFilters"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -211,7 +224,7 @@ task SplitMarkers {
}
Int disk_size = ceil(size(vcf_file, "GiB") * 2)
- Int memory_size = 3000
+ Int memory_size = ceil(3000 + size(vcf_file, "MiB") * 2)
command <<<
bcftools view --max-alleles 2 --min-alleles 2 --output-type z --output-file biallelics.vcf.gz ~{vcf_file}
@@ -220,6 +233,7 @@ task SplitMarkers {
runtime {
docker:"lifebitai/bcftools:1.10.2"
+ singularity: "docker://lifebitai/bcftools:1.10.2"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -227,7 +241,7 @@ task SplitMarkers {
# Slurm
job_name: "SplitMarkers"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -244,12 +258,15 @@ task SplitMarkers {
task JointMarkers {
input{
+ String SNPCall_program
+ String CountsFrom
+ String GenotypeCall_program
File biallelic_vcf
File? multiallelic_vcf
}
Int disk_size = ceil(size(biallelic_vcf, "GiB") * 2 + size(multiallelic_vcf, "GiB") * 2)
- Int memory_size = 3000
+ Int memory_size = ceil(3000 + size(biallelic_vcf, "MiB") * 2 + size(multiallelic_vcf, "MiB") * 2)
command <<<
@@ -270,13 +287,14 @@ task JointMarkers {
tabix -p vcf ~{multiallelic_vcf}
bcftools view -S samples.txt ~{multiallelic_vcf} > multiallelic_sort.vcf
- bcftools concat biallelic_sort.vcf multiallelic_sort.vcf --output merged.vcf
- bgzip merged.vcf
+ bcftools concat biallelic_sort.vcf multiallelic_sort.vcf --output ~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.vcf
+ bgzip ~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.vcf
>>>
runtime {
docker:"lifebitai/bcftools:1.10.2"
+ singularity: "docker://lifebitai/bcftools:1.10.2"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -284,7 +302,7 @@ task JointMarkers {
# Slurm
job_name: "JointMarkers"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -294,7 +312,7 @@ task JointMarkers {
}
output {
- File merged_vcf = "merged.vcf.gz"
+ File merged_vcf = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.vcf.gz"
}
}
@@ -340,6 +358,7 @@ task ReplaceAD {
runtime {
docker:"lifebitai/bcftools:1.10.2"
+ singularity: "docker://lifebitai/bcftools:1.10.2"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -347,7 +366,7 @@ task ReplaceAD {
# Slurm
job_name: "ReplaceAD"
mem:"~{memory_size}M"
- time:"24:00:00"
+ time: 24
}
meta {
@@ -375,7 +394,7 @@ task Compress {
}
Int disk_size = ceil(size(RDatas, "GiB") + size(maps_report, "GiB") + size(times, "GiB") + size(filters_report, "GiB") + size(errors_report, "GiB"))
- Int memory_size = 1000
+ Int memory_size = ceil(2000 + size(RDatas, "MiB") + size(maps_report, "MiB") + size(times, "MiB") + size(filters_report, "MiB") + size(errors_report, "MiB"))
command <<<
@@ -391,6 +410,7 @@ task Compress {
runtime {
docker:"ubuntu:20.04"
+ singularity: "docker://ubuntu:20.04"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -398,7 +418,7 @@ task Compress {
# Slurm
job_name: "Compress"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -425,7 +445,7 @@ task GetMarkersPos {
}
Int disk_size = ceil(size(true_vcf, "GiB") * 1.5 + size(filtered_gatk_vcf, "GiB") * 1.5 + size(filtered_gatk_vcf_bamcounts, "GiB") + size(filtered_freebayes_vcf, "GiB") + size(filtered_freebayes_vcf_bamcounts, "GiB"))
- Int memory_size = 5000
+ Int memory_size = ceil(3000 + size(true_vcf, "MiB") * 1.5 + size(filtered_gatk_vcf, "MiB") * 1.5 + size(filtered_gatk_vcf_bamcounts, "MiB") + size(filtered_freebayes_vcf, "MiB") + size(filtered_freebayes_vcf_bamcounts, "MiB"))
command <<<
@@ -442,6 +462,7 @@ task GetMarkersPos {
runtime {
docker:"lifebitai/bcftools:1.10.2"
+ singularity: "docker://lifebitai/bcftools:1.10.2"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -449,7 +470,7 @@ task GetMarkersPos {
# Slurm
job_name: "GetMarkerPos"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -469,7 +490,7 @@ task MergeBams{
}
Int disk_size = ceil(size(bam_files, "GiB") * 2)
- Int memory_size = ceil(size(bam_files, "MiB") * 2)
+ Int memory_size = ceil(3000 + size(bam_files, "MiB") * 2)
command <<<
samtools merge merged.bam ~{sep=" " bam_files}
@@ -477,6 +498,7 @@ task MergeBams{
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity: "docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -484,7 +506,7 @@ task MergeBams{
# Slurm
job_name: "MergeBams"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -505,7 +527,7 @@ task TarFiles {
}
Int disk_size = ceil(size(sequences, "GiB") * 2)
- Int memory_size = 6000
+ Int memory_size = ceil(4000 + size(sequences, "MiB") * 2)
command <<<
mkdir results
@@ -515,6 +537,7 @@ task TarFiles {
runtime {
docker:"kfdrc/cutadapt"
+ singularity: "docker://kfdrc/cutadapt"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -522,7 +545,7 @@ task TarFiles {
# Slurm
job_name: "TarFiles"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
output {
@@ -540,7 +563,7 @@ task VariantFiltration {
}
Int disk_size = ceil(size(vcf_file, "GB") + size(reference, "GB") + 1)
- Int memory_size = 5000
+ Int memory_size = ceil(5000 + size(vcf_file, "MiB") * 2)
command <<<
/usr/gitc/gatk4/./gatk VariantFiltration \
@@ -564,6 +587,7 @@ task VariantFiltration {
runtime {
docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
+ singularity: "docker://us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.5.7-2021-06-09_16-47-48Z"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -571,7 +595,7 @@ task VariantFiltration {
# Slurm
job_name: "VariantFiltration"
mem:"~{memory_size}M"
- time:"01:00:00"
+ time: 1
}
meta {
@@ -592,7 +616,7 @@ task BamToBed {
}
Int disk_size = ceil(size(merged_bams, "GiB") * 1.5)
- Int memory_size = 3000
+ Int memory_size = ceil(3000 + size(merged_bams, "MiB") * 1.5)
command <<<
bamToBed -i ~{merged_bams} > file.bed
@@ -601,6 +625,7 @@ task BamToBed {
runtime {
docker: "biocontainers/bedtools:v2.27.1dfsg-4-deb_cv1"
+ singularity: "docker://biocontainers/bedtools:v2.27.1dfsg-4-deb_cv1"
cpu: 1
# Cloud
memory:"~{memory_size} MiB"
@@ -608,7 +633,7 @@ task BamToBed {
# Slurm
job_name: "BamToBed"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
diff --git a/tasks/utilsR.wdl b/tasks/utilsR.wdl
index e30cb8f8..a03061a9 100644
--- a/tasks/utilsR.wdl
+++ b/tasks/utilsR.wdl
@@ -45,6 +45,7 @@ task vcf2onemap {
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity: "docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -52,7 +53,7 @@ task vcf2onemap {
# Slurm
job_name: "vcf2onemap"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -99,6 +100,7 @@ task FiltersReport {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -106,7 +108,7 @@ task FiltersReport {
# Slurm
job_name: "Filters"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -151,6 +153,7 @@ task FiltersReportEmp {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -158,7 +161,7 @@ task FiltersReportEmp {
# Slurm
job_name: "Filters"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -257,6 +260,7 @@ task MapsReport {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu:4
# Cloud
memory:"~{memory_size} MiB"
@@ -264,7 +268,7 @@ task MapsReport {
# Slurm
job_name: "MapsReport"
mem:"~{memory_size}M"
- time:"48:00:00"
+ time: 48
}
meta {
@@ -346,6 +350,7 @@ task ErrorsReport {
runtime {
docker: "cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -353,7 +358,7 @@ task ErrorsReport {
# Slurm
job_name: "ErrorsReport"
mem:"~{memory_size}M"
- time:"05:00:00"
+ time: 5
}
meta {
@@ -410,6 +415,7 @@ task CheckDepths {
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -417,7 +423,7 @@ task CheckDepths {
# Slurm
job_name: "CheckDepths"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -473,6 +479,7 @@ task MapsReportEmp {
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu:max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -480,7 +487,7 @@ task MapsReportEmp {
# Slurm
job_name: "MapsEmp"
mem:"~{memory_size}M"
- time:"48:00:00"
+ time: 48
}
meta {
@@ -500,6 +507,8 @@ task MapsReportEmp {
# Exclusive for biallelic markers, input VCF file are normalized with -m-any
task ReGenotyping {
input {
+ String SNPCall_program
+ String CountsFrom
String GenotypeCall_program
File vcf_file
String cross
@@ -526,7 +535,7 @@ task ReGenotyping {
stop("Invalid crosstype")
}
- out_vcf <- "regeno.vcf.gz"
+ out_vcf <- "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_regeno.vcf.gz"
if (method == "updog") {
updog_genotype_vcf(vcf="~{vcf_file}",
@@ -560,7 +569,7 @@ task ReGenotyping {
out_vcf = out_vcf)
}
- system("gunzip regeno.vcf.gz")
+ system("gunzip ~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_regeno.vcf.gz")
RSCRIPT
@@ -568,6 +577,7 @@ task ReGenotyping {
runtime {
docker:"cristaniguti/reads2map:0.0.5"
+ singularity:"docker://cristaniguti/reads2map:0.0.5"
cpu: max_cores
# Cloud
memory:"~{memory_size} MiB"
@@ -575,7 +585,7 @@ task ReGenotyping {
# Slurm
job_name: "ReGenotyping"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -585,7 +595,7 @@ task ReGenotyping {
}
output {
- File regeno_vcf = "regeno.vcf"
+ File regeno_vcf = "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_regeno.vcf"
}
}
@@ -654,6 +664,7 @@ task SetProbs {
>>>
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -661,7 +672,7 @@ task SetProbs {
# Slurm
job_name: "SetProbs"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -745,6 +756,7 @@ task SetProbsDefault {
>>>
runtime {
docker:"cristaniguti/reads2map:0.0.4"
+ singularity:"docker://cristaniguti/reads2map:0.0.4"
cpu:1
# Cloud
memory:"~{memory_size} MiB"
@@ -752,7 +764,7 @@ task SetProbsDefault {
# Slurm
job_name: "SetProbsDefault"
mem:"~{memory_size}M"
- time:"10:00:00"
+ time: 10
}
meta {
@@ -778,7 +790,7 @@ task RemoveNonInformative {
}
Int disk_size = ceil(size(vcf_file, "GiB") * 2)
- Int memory_size = 3000
+ Int memory_size = ceil(3000 + size(vcf_file, "MiB") * 2)
command <<<
R --vanilla --no-save <