diff --git a/.dockerfiles/reads2map/Dockerfile b/.dockerfiles/reads2map/Dockerfile index fac7c35..e7406ae 100644 --- a/.dockerfiles/reads2map/Dockerfile +++ b/.dockerfiles/reads2map/Dockerfile @@ -16,7 +16,7 @@ RUN apt update \ libxml2-dev \ libnlopt-dev=2.4.2+dfsg-4 \ devscripts=2.17.12ubuntu1.1 \ - software-properties-common=0.96.24.32.18 \ + software-properties-common=0.96.24.32.22 \ libgit2-dev=0.26.0+dfsg.1-1.1ubuntu0.2 \ && pip install matplotlib==2.0.2 numpy @@ -53,7 +53,7 @@ RUN Rscript -e 'remotes::install_version("gsalib",upgrade="never", version = "2. RUN Rscript -e 'remotes::install_github("tpbilton/GUSbase", ref = "92119b9c57faa7abeede8236d24a4a8e85fb3df7")' RUN Rscript -e 'remotes::install_github("tpbilton/GUSMap", ref = "4d7d4057049819d045750d760a45976c8f30dac6")' -RUN Rscript -e 'remotes::install_github("dcgerard/updog")' +RUN Rscript -e 'remotes::install_github("dcgerard/updog", ref="f1")' RUN Rscript -e 'remotes::install_github("Cristianetaniguti/onemap")' diff --git a/.dockerfiles/stacks/Dockerfile b/.dockerfiles/stacks/Dockerfile index 60992ee..1c1b0f9 100644 --- a/.dockerfiles/stacks/Dockerfile +++ b/.dockerfiles/stacks/Dockerfile @@ -2,9 +2,9 @@ FROM ubuntu:18.04 RUN apt update \ && apt install -y build-essential wget zlib1g-dev \ - && wget http://catchenlab.life.illinois.edu/stacks/source/stacks-2.62.tar.gz \ - && tar xfvz stacks-2.62.tar.gz \ - && cd stacks-2.62 \ + && wget http://catchenlab.life.illinois.edu/stacks/source/stacks-2.64.tar.gz \ + && tar xfvz stacks-2.64.tar.gz \ + && cd stacks-2.64 \ && ./configure \ && make \ && make install \ No newline at end of file diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md b/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md index 7460fc6..2580c16 100644 --- a/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md +++ b/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md @@ -1,3 +1,12 @@ +# 1.2.5 + +* more flexibility to choose the probability to be used in the HMM: + +* new parameters: +- global_errors: array with global errors to be tested +- genoprob_error: boolean defining if software probabilities should be tested +- genoprob_global_errors: array with global errors to be tested together with the software probabilities following: 1 - (1 - global error) x (1 - software error probability) + # 1.2.4 * runtimes adapted to run with Caper diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.inputs.json b/pipelines/EmpiricalMaps/EmpiricalMaps.inputs.json index c18ba56..3749e25 100644 --- a/pipelines/EmpiricalMaps/EmpiricalMaps.inputs.json +++ b/pipelines/EmpiricalMaps/EmpiricalMaps.inputs.json @@ -7,17 +7,24 @@ "cross": "String", "multiallelics": "Boolean" }, + "Maps.run_supermassa": "Boolean (optional, default = false)", "Maps.max_cores": "Int", "Maps.gatk_vcf_multi": "File? (optional)", "Maps.gatk_mchap": "String", "Maps.vcfs_counts_source": "Array[String]", - "Maps.filters": "String? (optional)", - "Maps.filt_segr": "String? (optional)", - "Maps.prob_thres": "Float? (optional)", + "Maps.run_gusmap": "Boolean (optional, default = false)", + "Maps.genoprob_error": "Boolean (optional, default = true)", + "Maps.prob_thres": "Float (optional, default = 0.8)", "Maps.ploidy": "Int", "Maps.vcfs_software": "Array[String]", "Maps.filter_noninfo": "Boolean", + "Maps.filters": "String? (optional)", + "Maps.global_errors": "Array[String] (optional, default = [\"0.05\"])", + "Maps.run_polyrad": "Boolean (optional, default = true)", "Maps.vcfs": "Array[File]", - "Maps.replaceADbyMissing": "String" + "Maps.replaceADbyMissing": "String", + "Maps.genoprob_global_errors": "Array[String] (optional, default = [\"0.05\"])", + "Maps.filt_segr": "String? (optional)", + "Maps.run_updog": "Boolean (optional, default = true)" } diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl index 63630d3..90df7f2 100644 --- a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl +++ b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl @@ -25,14 +25,17 @@ workflow Maps { Boolean run_polyrad = true Boolean run_gusmap = false Boolean filter_noninfo - String replaceADbyMissing + Boolean replaceADbyMissing File? gatk_vcf_multi String gatk_mchap String? filters Int max_cores Int ploidy - Float? prob_thres + Float prob_thres = 0.8 String? filt_segr + Array[String] global_errors = ["0.05"] + Boolean genoprob_error = true + Array[String] genoprob_global_errors = ["0.05"] } if (defined(filters)) { @@ -42,7 +45,8 @@ workflow Maps { vcfs_SNPCall_software = vcfs_software, vcfs_Counts_source = vcfs_counts_source, vcfs_GenoCall_software = range(length(vcfs_software)), - filters = filters + filters = filters, + chromosome = dataset.chromosome } } @@ -84,7 +88,11 @@ workflow Maps { multiallelics = dataset.multiallelics, multiallelics_file = splitgeno.multiallelics, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors } } @@ -102,7 +110,11 @@ workflow Maps { multiallelics = dataset.multiallelics, multiallelics_file = splitgeno.multiallelics, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors } } @@ -120,7 +132,11 @@ workflow Maps { multiallelics = dataset.multiallelics, multiallelics_file = splitgeno.multiallelics, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors } } @@ -151,8 +167,10 @@ workflow Maps { multiallelics = dataset.multiallelics, max_cores = max_cores, multiallelics_file = splitgeno.multiallelics, - multiallelics_mchap = gatk_vcf_multi, - mchap = gatk_mchap + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors } } } diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md index f85a429..efedf23 100644 --- a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md +++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md @@ -1,3 +1,16 @@ +# 1.4.2 + +* more flexibility to choose the probability to be used in the HMM: + +* new parameters: +- global_errors: array with global errors to be tested +- genoprob_error: boolean defining if software probabilities should be tested +- genoprob_global_errors: array with global errors to be tested together with the software probabilities following: 1 - (1 - global error) x (1 - software error probability) + +# 1.4.1 + +* Use BCFtools norm to left-align indel marker positions identified by GATK, STACKs and freebayes + # 1.4.0 * STACKs included diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.inputs.json b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.inputs.json index 9945e78..e98be9c 100644 --- a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.inputs.json +++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.inputs.json @@ -10,16 +10,10 @@ "EmpiricalReads.chunk_size": "Int", "EmpiricalReads.Maps.run_gusmap": "Boolean (optional, default = false)", "EmpiricalReads.gatk_mchap": "Boolean (optional, default = false)", + "EmpiricalReads.Maps.global_errors": "Array[String] (optional, default = [\"0.05\"])", "EmpiricalReads.rm_dupli": "Boolean (optional, default = false)", "EmpiricalReads.SNPCalling.max_ram": "Int", - "EmpiricalReads.dataset": { - "parent2": "String", - "name": "String", - "parent1": "String", - "chromosome": "String", - "cross": "String", - "multiallelics": "Boolean" - }, + "EmpiricalReads.Maps.genoprob_error": "Boolean (optional, default = true)", "EmpiricalReads.ploidy": "Int (optional, default = 2)", "EmpiricalReads.n_chrom": "Int", "EmpiricalReads.SNPCalling.chunk_size": "Int", @@ -43,6 +37,15 @@ "EmpiricalReads.Maps.filter_noninfo": "Boolean", "EmpiricalReads.SNPCalling.pop_map": "File? (optional)", "EmpiricalReads.samples_info": "File", + "EmpiricalReads.dataset": { + "parent2": "String", + "name": "String", + "parent1": "String", + "chromosome": "String", + "cross": "String", + "multiallelics": "Boolean" + }, + "EmpiricalReads.Maps.genoprob_global_errors": "Array[String] (optional, default = [\"0.05\"])", "EmpiricalReads.pair_end": "Boolean (optional, default = false)", "EmpiricalReads.run_gatk": "Boolean (optional, default = true)" } diff --git a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md index f51a21c..051d0ee 100644 --- a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md +++ b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md @@ -1,3 +1,7 @@ +# 1.4.1 + +* Use BCFtools norm to left-align indel marker positions identified by GATK, STACKs and freebayes + # 1.4.0 * STACKs included @@ -47,4 +51,4 @@ This workflow requires: * Diploid or polyploid specie * Single-end reads -* A reference genome \ No newline at end of file +* A reference genome diff --git a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl index a501579..fb514f0 100644 --- a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl +++ b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.wdl @@ -92,6 +92,7 @@ workflow SNPCalling { if(run_stacks) { call stacks.StacksGenotyping { input: + references = references, bams = CreateAlignmentFromFamilies.bam, pop_map = pop_map, max_cores = max_cores diff --git a/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md b/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md index 08373c0..f3e060d 100644 --- a/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md +++ b/pipelines/SimulatedReads2Map/SimulatedReads2Map.changelog.md @@ -1,3 +1,12 @@ +# 1.0.2 + +* more flexibility to choose the probability to be used in the HMM: + +* new parameters: +- global_errors: array with global errors to be tested +- genoprob_error: boolean defining if software probabilities should be tested +- genoprob_global_errors: array with global errors to be tested together with the software probabilities following: 1 - (1 - global error) x (1 - software error probability) + # 1.0.1 * runtimes adapted to run with Caper diff --git a/pipelines/SimulatedReads2Map/SimulatedReads2Map.inputs.json b/pipelines/SimulatedReads2Map/SimulatedReads2Map.inputs.json index ac689dc..8622fa9 100644 --- a/pipelines/SimulatedReads2Map/SimulatedReads2Map.inputs.json +++ b/pipelines/SimulatedReads2Map/SimulatedReads2Map.inputs.json @@ -1,5 +1,7 @@ { + "SimulatedReads.SimulatedSingleFamily.global_errors": "Array[String]", "SimulatedReads.filters": "String? (optional)", + "SimulatedReads.SimulatedSingleFamily.genoprob_global_errors": "Array[String]", "SimulatedReads.hardfilters": "Boolean (optional, default = true)", "SimulatedReads.family": { "cmBymb": "Float? (optional)", @@ -22,6 +24,8 @@ "SimulatedReads.n_chrom": "Int", "SimulatedReads.chunk_size": "Int (optional, default = 5)", "SimulatedReads.max_cores": "Int", + "SimulatedReads.SimulatedSingleFamily.genoprob_error": "String", + "SimulatedReads.SimulatedSingleFamily.prob_thres": "Float", "SimulatedReads.number_of_families": "Int", "SimulatedReads.gatk_mchap": "Boolean (optional, default = false)", "SimulatedReads.global_seed": "Int", diff --git a/subworkflows/SimulatedSingleFamily.wdl b/subworkflows/SimulatedSingleFamily.wdl index 80718f4..709dbdb 100644 --- a/subworkflows/SimulatedSingleFamily.wdl +++ b/subworkflows/SimulatedSingleFamily.wdl @@ -27,6 +27,10 @@ workflow SimulatedSingleFamily { Boolean gatk_mchap Boolean hardfilters Int n_chrom + Float prob_thres + Array[String] global_errors + String genoprob_error + Array[String] genoprob_global_errors } call simulation.CreateAlignmentFromSimulation { @@ -148,7 +152,11 @@ workflow SimulatedSingleFamily { depth = sequencing.depth, multiallelics = sequencing.multiallelics, multiallelics_file = splitgeno.multiallelics, - ploidy = ploidy + ploidy = ploidy, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors } call genotyping.onemapMaps as supermassaMaps { @@ -168,7 +176,11 @@ workflow SimulatedSingleFamily { depth = sequencing.depth, multiallelics = sequencing.multiallelics, multiallelics_file = splitgeno.multiallelics, - ploidy = ploidy + ploidy = ploidy, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors } call genotyping.onemapMaps as polyradMaps { @@ -188,7 +200,11 @@ workflow SimulatedSingleFamily { depth = sequencing.depth, multiallelics = sequencing.multiallelics, multiallelics_file = splitgeno.multiallelics, - ploidy = ploidy + ploidy = ploidy, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors } } diff --git a/subworkflows/genotyping_empirical.wdl b/subworkflows/genotyping_empirical.wdl index f08ddf6..7ab8f9d 100644 --- a/subworkflows/genotyping_empirical.wdl +++ b/subworkflows/genotyping_empirical.wdl @@ -18,6 +18,10 @@ workflow onemapMapsEmp { Boolean multiallelics File? multiallelics_file Int ploidy + Float prob_thres + Array[String] global_errors + Boolean genoprob_error + Array[String] genoprob_global_errors } call utilsR.ReGenotyping { @@ -74,31 +78,32 @@ workflow onemapMapsEmp { parent1 = parent1, parent2 = parent2, multiallelics = multiallelics, - SNPCall_program = SNPCall_program + SNPCall_program = SNPCall_program, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors, + GenotypeCall_program = GenotypeCall_program } - Array[String] methods = [GenotypeCall_program, GenotypeCall_program + "0.05"] - Array[File] objects = [SetProbs.probs_onemap_obj, SetProbs.globalerror_onemap_obj] - Array[Pair[String, File]] methods_and_objects = zip(methods, objects) - - scatter (item in methods_and_objects) { + scatter (item in range(length(SetProbs.probs_onemap_obj))) { call utilsR.CheckDepths { input: - onemap_obj = item.right, + onemap_obj = SetProbs.probs_onemap_obj[item], vcfR_obj = SetProbs.vcfR_obj, parent1 = parent1, parent2 = parent2, SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, max_cores = max_cores } call utilsR.FiltersReportEmp { input: - onemap_obj = item.right, + onemap_obj = SetProbs.probs_onemap_obj[item], SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, chromosome = chromosome } @@ -107,7 +112,7 @@ workflow onemapMapsEmp { input: sequence_obj = FiltersReportEmp.onemap_obj_filtered, SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, max_cores = max_cores } diff --git a/subworkflows/genotyping_simulated.wdl b/subworkflows/genotyping_simulated.wdl index d67d504..89f3b4d 100644 --- a/subworkflows/genotyping_simulated.wdl +++ b/subworkflows/genotyping_simulated.wdl @@ -21,6 +21,10 @@ workflow onemapMaps { String multiallelics File? multiallelics_file Int ploidy + Float prob_thres + Array[String] global_errors + String genoprob_error + Array[String] genoprob_global_errors } @@ -57,20 +61,21 @@ workflow onemapMaps { parent1 = "P1", parent2 = "P2", multiallelics = multiallelics, - SNPCall_program = SNPCall_program + SNPCall_program = SNPCall_program, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors, + GenotypeCall_program = genotyping_program } - Array[String] methods = [genotyping_program, genotyping_program + "0.05"] - Array[File] objects = [SetProbs.probs_onemap_obj, SetProbs.globalerror_onemap_obj] - Array[Pair[String, File]] methods_and_objects = zip(methods, objects) - - scatter (item in methods_and_objects) { + scatter (item in range(length(SetProbs.probs_onemap_obj))) { call utilsR.FiltersReport { input: - onemap_obj = item.right, + onemap_obj = SetProbs.probs_onemap_obj[item], SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, seed = seed, depth = depth @@ -78,30 +83,30 @@ workflow onemapMaps { call utilsR.MapsReport { input: - onemap_obj = FiltersReport.onemap_obj_filtered, + onemap_obj = SetProbs.probs_onemap_obj[item], ref_alt_alleles = ref_alt_alleles, simu_onemap_obj = simu_onemap_obj, SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, simulated_phases = simulated_phases, seed = seed, depth = depth, max_cores = max_cores - } + } call utilsR.ErrorsReport { input: - onemap_obj = item.right, + onemap_obj = SetProbs.probs_onemap_obj[item], SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, simu_vcfR = simu_vcfR, vcfR_obj = SetProbs.vcfR_obj, seed = seed, depth = depth, max_cores = max_cores - } + } } diff --git a/subworkflows/norm_filt_vcf.wdl b/subworkflows/norm_filt_vcf.wdl index 17347fe..2b9e01f 100644 --- a/subworkflows/norm_filt_vcf.wdl +++ b/subworkflows/norm_filt_vcf.wdl @@ -19,9 +19,7 @@ workflow Normalization { input: vcf_file = vcf_in, reference = reference, - reference_idx = reference_idx, - ploidy = ploidy, - software = program + reference_idx = reference_idx } call gatk.VariantEval { diff --git a/subworkflows/snpcaller_maps_empirical.wdl b/subworkflows/snpcaller_maps_empirical.wdl index ddc5f68..e38e48b 100644 --- a/subworkflows/snpcaller_maps_empirical.wdl +++ b/subworkflows/snpcaller_maps_empirical.wdl @@ -15,9 +15,11 @@ workflow SNPCallerMapsEmp { String chromosome String multiallelics File? multiallelics_file - File? multiallelics_mchap - String mchap Int max_cores + Float prob_thres + Array[String] global_errors + Boolean genoprob_error + Array[String] genoprob_global_errors } @@ -34,40 +36,39 @@ workflow SNPCallerMapsEmp { File updated_vcf = select_first([JointMarkers.merged_vcf, vcf_file]) - call utilsR.SetProbsDefault { + call utilsR.SetProbs { input: vcf_file = updated_vcf, cross = cross, parent1 = parent1, parent2 = parent2, - SNPCall_program = SNPCall_program, multiallelics = multiallelics, - multiallelics_mchap = multiallelics_mchap, - mchap = mchap + SNPCall_program = SNPCall_program, + global_errors = global_errors, + genoprob_error = genoprob_error, + prob_thres = prob_thres, + genoprob_global_errors = genoprob_global_errors, + GenotypeCall_program = GenotypeCall_program } - Array[String] methods = [GenotypeCall_program, GenotypeCall_program + "0.05", GenotypeCall_program + "default"] - Array[File] objects = [SetProbsDefault.probs_onemap_obj, SetProbsDefault.globalerror_onemap_obj, SetProbsDefault.default_onemap_obj] - Array[Pair[String, File]] methods_and_objects = zip(methods, objects) - - scatter (item in methods_and_objects) { + scatter (item in range(length(SetProbs.probs_onemap_obj))) { call utilsR.CheckDepths { input: - onemap_obj = item.right, - vcfR_obj = SetProbsDefault.vcfR_obj, + onemap_obj = SetProbs.probs_onemap_obj[item], + vcfR_obj = SetProbs.vcfR_obj, parent1 = parent1, parent2 = parent2, SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, max_cores = max_cores } call utilsR.FiltersReportEmp { input: - onemap_obj = item.right, + onemap_obj = SetProbs.probs_onemap_obj[item], SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, chromosome = chromosome } @@ -76,7 +77,7 @@ workflow SNPCallerMapsEmp { input: sequence_obj = FiltersReportEmp.onemap_obj_filtered, SNPCall_program = SNPCall_program, - GenotypeCall_program = item.left, + GenotypeCall_program = SetProbs.probs_onemap_obj_names[item], CountsFrom = CountsFrom, max_cores = max_cores } diff --git a/subworkflows/stacks_genotyping.wdl b/subworkflows/stacks_genotyping.wdl index baaf7c1..a4d47cd 100644 --- a/subworkflows/stacks_genotyping.wdl +++ b/subworkflows/stacks_genotyping.wdl @@ -1,10 +1,13 @@ version 1.0 +import "../structs/dna_seq_structs.wdl" import "../tasks/stacks.wdl" +import "../subworkflows/norm_filt_vcf.wdl" as norm_filt workflow StacksGenotyping { input { + ReferenceFasta references Array[File] bams File? pop_map Int max_cores @@ -26,8 +29,19 @@ workflow StacksGenotyping { max_cores = max_cores } + call norm_filt.Normalization { + input: + vcf_in= RefMap.stacks_vcf, + reference = references.ref_fasta, + reference_idx = references.ref_fasta_index, + reference_dict = references.ref_dict, + program = "stacks", + counts_source = "vcf", + ploidy = "2" + } + output { - Array[File] vcfs = [RefMap.stacks_vcf] + Array[File] vcfs = [Normalization.vcf_norm] File stacks_multiallelics = RefMap.stacks_multiallelics Array[String] software_sele = ["stacks"] Array[String] source_sele = ["vcf"] diff --git a/subworkflows/tassel_genotyping.wdl b/subworkflows/tassel_genotyping.wdl index 1e0d5ff..5101652 100644 --- a/subworkflows/tassel_genotyping.wdl +++ b/subworkflows/tassel_genotyping.wdl @@ -3,8 +3,10 @@ version 1.0 import "../structs/dna_seq_structs.wdl" import "../tasks/tassel.wdl" import "../tasks/BWA.wdl" +import "../tasks/bcftools.wdl" import "../tasks/utils.wdl" as utils +import "../subworkflows/norm_filt_vcf.wdl" as norm_filt workflow TasselGenotyping { input { @@ -25,7 +27,7 @@ workflow TasselGenotyping { call tassel.BarcodeFaker { input: fastq = sample_file[0], - FullSampleName = sample_file[2] + FullSampleName = sample_file[1] # Tassel does not consider replicates variances, we simple joint the reads } call tassel.TasselBeforeAlign { @@ -57,8 +59,31 @@ workflow TasselGenotyping { fastq = BarcodeFaker.barcode_fastq } + # I tried to include the normalization sub-workflow here, but + # TASSEL VCF does not has the standard header format, then + # BCFtools does not work + # I included a new tast to fix the VCF before normalization + + call bcftools.FixTasselVCF{ + input: + vcf_file = TasselAfterAlign.tassel_vcf, + reference = references.ref_fasta, + reference_idx = references.ref_fasta_index + } + + call norm_filt.Normalization { + input: + vcf_in= FixTasselVCF.vcf_fixed, + reference = references.ref_fasta, + reference_idx = references.ref_fasta_index, + reference_dict = references.ref_dict, + program = "tassel", + counts_source = "vcf", + ploidy = "2" + } + output { - Array[File] vcfs = [TasselAfterAlign.tassel_vcf] + Array[File] vcfs = [Normalization.vcf_norm] Array[String] software_sele = ["tassel"] Array[String] source_sele = ["vcf"] } diff --git a/tasks/JointReports.wdl b/tasks/JointReports.wdl index ae76562..0bfa0b7 100644 --- a/tasks/JointReports.wdl +++ b/tasks/JointReports.wdl @@ -24,7 +24,6 @@ task JointAllReports{ library(tidyr) library(stringr) library(vroom) - library(largeList) if(~{ploidy} > 2){ @@ -89,32 +88,10 @@ task JointAllReports{ files <- paste0(path_dir, "/times/", files) times <- vroom(files, num_threads = ~{max_cores}) - files <- system(paste0("ls ", path_dir, "/RDatas/"), intern = T) - files <- paste0(path_dir, "/RDatas/", files) - - all_RDatas <- list() - for(i in 1:length(files)){ - map_temp <- load(files[i]) - all_RDatas[[i]] <- get(map_temp) - } - - names(all_RDatas) <- basename(files) - - if(any(grepl("gusmap", names(all_RDatas)))){ - gusmap_RDatas <- all_RDatas[grep("gusmap", names(all_RDatas))] - save(gusmap_RDatas, file = "gusmap_RDatas.RData") - RDatas <- all_RDatas[-grep("gusmap", names(all_RDatas))] - } else RDatas <- all_RDatas - - # # Converting onemap sequencig objects to list. LargeList do not accept other class - # # Also because of this gusmap is separated, because the developers worked with enviroments, not classes - for(i in 1:length(RDatas)){ - class(RDatas[[i]]) <- "list" - } - - saveList(RDatas, file = "sequences_emp.llo", append=FALSE, compress=TRUE) - - new_names <- names(all_RDatas) + system("mkdir sequences_emp") + new_names <- system(paste0("ls ", path_dir, "/RDatas/"), intern = T) + files <- system(paste0("cp ", path_dir, "/RDatas/* sequences_emp")) + vroom_write(as.data.frame(new_names), "names.tsv.gz") # Outputs @@ -125,9 +102,7 @@ task JointAllReports{ system("mkdir EmpiricalReads_results") - if(any(grepl("gusmap", names(all_RDatas)))){ - system("mv gusmap_RDatas.RData sequences_emp.llo data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results") - } else system("mv sequences_emp.llo data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results") + system("mv sequences_emp data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results") system("tar -czvf EmpiricalReads_results.tar.gz EmpiricalReads_results") diff --git a/tasks/bcftools.wdl b/tasks/bcftools.wdl index b0f2214..350cea9 100644 --- a/tasks/bcftools.wdl +++ b/tasks/bcftools.wdl @@ -8,8 +8,6 @@ task BiallelicNormalization { File vcf_file File reference File reference_idx - Int ploidy - String software } Int disk_size = ceil(size(vcf_file, "GiB") + size(reference, "GiB") + 2) @@ -17,12 +15,7 @@ task BiallelicNormalization { command <<< - #if [ ~{ploidy} -gt 2 ] && [ "~{software}" == "freebayes" ] # GATK returns an error when trying to split by row saying that PL has wrong number of fields - #then - # bcftools norm ~{vcf_file} -m - -Ov --check-ref w -f ~{reference} > vcf_norm.vcf - #else - bcftools norm ~{vcf_file} --rm-dup all -Ov --check-ref w -f ~{reference} > vcf_norm.vcf - #fi + bcftools norm ~{vcf_file} --rm-dup all -Ov --check-ref w -f ~{reference} > vcf_norm.vcf bgzip vcf_norm.vcf tabix -p vcf vcf_norm.vcf.gz @@ -52,3 +45,58 @@ task BiallelicNormalization { File vcf_norm_tbi = "vcf_norm.vcf.gz.tbi" } } + + +task FixTasselVCF { + input { + File vcf_file + File reference + File reference_idx + } + + Int disk_size = ceil(size(vcf_file, "GiB") + size(reference, "GiB") + 2) + Int memory_size = 4000 + ceil(size(vcf_file, "MiB") + size(reference, "MiB") + 2) + + command <<< + + sed 's/PL,Number=./PL,Number=G/g' ~{vcf_file} > tassel_fix.vcf + sed 's/AD,Number=./AD,Number=R/g' tassel_fix.vcf > tassel_fix.vcf + sed 's/AF,Number=./AF,Number=A/g' tassel_fix.vcf > tassel_fix.vcf + sed '/INFO=' tassel_fix.vcf > tassel_fix.vcf + + grep ">" ~{reference} > chrs + sed -i 's/>//' chrs + cp chrs chrs_tassel + sed -i 's/Chr//' chrs_tassel + sed -i 's/scaffold/SCAFFOLD/' chrs_tassel + paste -d'\t' chrs_tassel chrs > fix_chrom + + bgzip tassel_fix.vcf + tabix -p vcf tassel_fix.vcf.gz + bcftools annotate --rename-chrs fix_chrom tassel_fix.vcf.gz > tassel_fix_chr.vcf.gz + + >>> + + runtime { + docker: "lifebitai/bcftools:1.10.2" + singularity:"docker://lifebitai/bcftools:1.10.2" + cpu: 1 + # Cloud + memory:"~{memory_size} MiB" + disks:"local-disk " + disk_size + " HDD" + # Slurm + job_name: "BiallelicNormalization" + mem:"~{memory_size}M" + time: 1 + } + + meta { + author: "Cristiane Taniguti" + email: "chtaniguti@tamu.edu" + description: "Uses [bcftools](https://samtools.github.io/bcftools/bcftools.html) to left-align and normalize indels in the VCF file." + } + + output { + File vcf_fixed = "tassel_fix_chr.vcf" + } +} \ No newline at end of file diff --git a/tasks/tassel.wdl b/tasks/tassel.wdl index daf78c3..bf8b28a 100644 --- a/tasks/tassel.wdl +++ b/tasks/tassel.wdl @@ -213,18 +213,19 @@ task TasselBeforeAlign { Int disk_size = ceil(size(fastq, "GiB")) Int memory_min = ceil(max_ram/2) + Int memory_max = max_ram - 5000 command <<< mkdir fastqs ln -s ~{sep=" " fastq} fastqs - /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -GBSSeqToTagDBPlugin -e ~{enzyme} -i fastqs \ + /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{memory_max}m -fork1 -GBSSeqToTagDBPlugin -e ~{enzyme} -i fastqs \ -db GBSV2.db \ -k ~{key_file} \ -kmerLength 64 -minKmerL 20 -mnQS 20 -mxKmerNum 100000000 -endPlugin -runfork1 - /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -TagExportToFastqPlugin -db GBSV2.db \ + /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{memory_max}m -fork1 -TagExportToFastqPlugin -db GBSV2.db \ -o tagsForAlign.fa.gz -c 1 -endPlugin -runfork1 >>> @@ -266,29 +267,30 @@ task TasselAfterAlign { Int disk_size = ceil(size(tassel_database, "GiB")) Int memory_min = ceil(max_ram/2) + Int memory_max = max_ram - 5000 command <<< samtools view -h ~{bam} > file.sam mv ~{tassel_database} . - /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -SAMToGBSdbPlugin \ + /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{memory_max}m -fork1 -SAMToGBSdbPlugin \ -i file.sam \ -db GBSV2.db \ -aProp 0.0 -aLen 0 -endPlugin -runfork1 - /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -DiscoverySNPCallerPluginV2 \ + /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{memory_max}m -fork1 -DiscoverySNPCallerPluginV2 \ -db GBSV2.db \ -mnLCov 0.1 -mnMAF 0.01 -deleteOldData true -endPlugin -runfork1 - /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -SNPQualityProfilerPlugin \ + /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{memory_max}m -fork1 -SNPQualityProfilerPlugin \ -db GBSV2.db \ -deleteOldData true -endPlugin -runfork1 mkdir fastqs ln -s ~{sep=" " fastq} fastqs - /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{max_ram}m -fork1 -ProductionSNPCallerPluginV2 \ + /usr/tassel/run_pipeline.pl -Xms~{memory_min}m -Xmx~{memory_max}m -fork1 -ProductionSNPCallerPluginV2 \ -db GBSV2.db \ -e ~{enzyme} -i fastqs \ -k ~{key_file} \ diff --git a/tasks/utils.wdl b/tasks/utils.wdl index e98428a..c5e97eb 100644 --- a/tasks/utils.wdl +++ b/tasks/utils.wdl @@ -161,7 +161,7 @@ task ApplyRandomFiltersArray { Array[String] vcfs_SNPCall_software Array[String] vcfs_Counts_source Array[String] vcfs_GenoCall_software - String? filters + String? filters String? chromosome } @@ -184,7 +184,7 @@ task ApplyRandomFiltersArray { fi tabix -p vcf temp.vcf.gz - bcftools view temp.vcf.gz ~{filters} ~{" -r " + chromosome} \ + bcftools view temp.vcf.gz ~{filters} \ -o vcf_filt_${vcfs_snp_software[$index]}_${vcfs_counts_source[$index]}_${vcfs_geno_software[$index]}.vcf bgzip vcf_filt_${vcfs_snp_software[$index]}_${vcfs_counts_source[$index]}_${vcfs_geno_software[$index]}.vcf rm temp.vcf.gz temp.vcf.gz.tbi diff --git a/tasks/utilsR.wdl b/tasks/utilsR.wdl index a03061a..c5503c6 100644 --- a/tasks/utilsR.wdl +++ b/tasks/utilsR.wdl @@ -44,8 +44,8 @@ task vcf2onemap { >>> runtime { - docker:"cristaniguti/reads2map:0.0.4" - singularity: "docker://cristaniguti/reads2map:0.0.4" + docker:"cristaniguti/reads2map:0.0.8" + singularity: "docker://cristaniguti/reads2map:0.0.8" cpu:1 # Cloud memory:"~{memory_size} MiB" @@ -99,8 +99,8 @@ task FiltersReport { >>> runtime { - docker: "cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker: "cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu:1 # Cloud memory:"~{memory_size} MiB" @@ -133,7 +133,7 @@ task FiltersReportEmp { } Int disk_size = ceil(size(onemap_obj, "GiB") * 2) - Int memory_size = ceil(size(onemap_obj, "MiB") * 2 + 3000) + Int memory_size = ceil(size(onemap_obj, "MiB") * 5 + 3000) command <<< R --vanilla --no-save <>> runtime { - docker: "cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker: "cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu:1 # Cloud memory:"~{memory_size} MiB" @@ -259,8 +259,8 @@ task MapsReport { >>> runtime { - docker: "cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker: "cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu:4 # Cloud memory:"~{memory_size} MiB" @@ -349,8 +349,8 @@ task ErrorsReport { >>> runtime { - docker: "cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker: "cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu: max_cores # Cloud memory:"~{memory_size} MiB" @@ -414,8 +414,8 @@ task CheckDepths { >>> runtime { - docker:"cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker:"cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu: max_cores # Cloud memory:"~{memory_size} MiB" @@ -448,7 +448,7 @@ task MapsReportEmp { } Int disk_size = ceil((size(sequence_obj, "GiB") * 2)) - Int memory_size = ceil(size(sequence_obj, "MiB") * 3 + 4000) + Int memory_size = ceil(size(sequence_obj, "MiB") * 12 + 4000) command <<< R --vanilla --no-save <>> runtime { - docker:"cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" - cpu:max_cores + docker:"cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" + cpu: if max_cores > 4 then 4 else max_cores # Cloud memory:"~{memory_size} MiB" disks:"local-disk " + disk_size + " HDD" @@ -519,7 +519,7 @@ task ReGenotyping { } Int disk_size = ceil((size(vcf_file, "GiB") * 4)) - Int memory_size = ceil(size(vcf_file, "MiB") * 3 + 4000) + Int memory_size = ceil(size(vcf_file, "MiB") * 5 + 4000) command <<< R --vanilla --no-save <>> runtime { - docker:"cristaniguti/reads2map:0.0.5" - singularity:"docker://cristaniguti/reads2map:0.0.5" + docker:"cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu: max_cores # Cloud memory:"~{memory_size} MiB" @@ -607,6 +607,11 @@ task SetProbs { String parent2 String multiallelics String SNPCall_program + String GenotypeCall_program + Float prob_thres + Array[String] global_errors + Boolean genoprob_error + Array[String] genoprob_global_errors } Int disk_size = ceil(size(vcf_file, "GiB") * 3) @@ -618,6 +623,11 @@ task SetProbs { library(vcfR) cross <- "~{cross}" + global_errors <- unlist(strsplit("~{sep="," global_errors}", ",")) + genoprob_error <- "~{genoprob_error}" + genoprob_global_errors <- unlist(strsplit("~{sep="," genoprob_global_errors}", ",")) + probs_onemap_obj <- list() + idx <- 1 if(cross == "F1"){ cross <- "outcross" @@ -638,33 +648,74 @@ task SetProbs { f1 = f1, only_biallelic = only_biallelic) # if("~{SNPCall_program}" == "freebayes") par <- "GL" else par <- "PL" + + if(any(genoprob_global_errors != "false") | genoprob_error != "false") { + probs <- extract_depth(vcfR.object=vcf, + onemap.object=onemap.obj, + vcf.par= "GQ", + parent1="~{parent1}", + parent2="~{parent2}", + f1 = f1, + recovering=FALSE) + + if(any(genoprob_error != "false")){ + temp <- create_probs(input.obj = onemap.obj, genotypes_errors=probs) + + if("~{prob_thres}" != "0"){ + onemap_prob <- filter_prob(temp, threshold = as.numeric("~{prob_thres}")) + probs_onemap_obj[[idx]] <- filter_missing(onemap_prob, threshold = 0.25) + } else { + probs_onemap_obj[[idx]] <- temp + } + names(probs_onemap_obj)[[idx]] <- "genoprob_error" + idx <- idx + 1 + } - probs <- extract_depth(vcfR.object=vcf, - onemap.object=onemap.obj, - vcf.par= "GQ", - parent1="~{parent1}", - parent2="~{parent2}", - f1 = f1, - recovering=FALSE) + if(any(genoprob_global_errors != "false")){ + for(i in genoprob_global_errors){ + temp <- create_probs(input.obj = onemap.obj, genotypes_errors= 1- (1-probs)*(1 - as.numeric(i))) + + if("~{prob_thres}" != "0"){ + onemap_prob <- filter_prob(temp, threshold = as.numeric("~{prob_thres}")) + probs_onemap_obj[[idx]] <- filter_missing(onemap_prob, threshold = 0.25) + } else { + probs_onemap_obj[[idx]] <- temp + } + + names(probs_onemap_obj)[[idx]] <- paste0("genoprob_global_error", i) + idx <- idx + 1 + } + } + } - probs_onemap_obj <- create_probs(input.obj = onemap.obj, genotypes_errors=probs) + if(any(global_errors != "false")){ + for(i in global_errors){ + probs_onemap_obj[[idx]] <- create_probs(input.obj = probs_onemap_obj[[1]], global_error = as.numeric(i)) + names(probs_onemap_obj)[[idx]] <- paste0("global_error", i) + idx <- idx + 1 + } + } - # If filter by genotype probability - # onemap_prob <- filter_prob(probs_onemap_obj, threshold = threshold) - # onemap_mis <- filter_missing(onemap_prob, threshold = 0.25) - # globalerror_onemap_obj <- create_probs(input.obj = onemap_mis, global_error = 0.05) + for(i in 1:length(probs_onemap_obj)){ + probs_onemap <- probs_onemap_obj[[i]] + save(probs_onemap, file= paste0("probs_onemap_", + names(probs_onemap_obj)[i], ".RData")) + } - globalerror_onemap_obj <- create_probs(input.obj = onemap.obj, global_error = 0.05) + cat(names(probs_onemap_obj)) - save(probs_onemap_obj, file="probs_onemap_obj.RData") - save(globalerror_onemap_obj, file="globalerror_onemap_obj.RData") + write.table(paste0("~{GenotypeCall_program}_", + names(probs_onemap_obj)), + file = "names.txt", row.names = FALSE, + col.names = FALSE, quote = FALSE) RSCRIPT >>> + runtime { - docker:"cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker:"cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu:1 # Cloud memory:"~{memory_size} MiB" @@ -682,8 +733,8 @@ task SetProbs { } output { - File probs_onemap_obj = "probs_onemap_obj.RData" - File globalerror_onemap_obj = "globalerror_onemap_obj.RData" + Array[File] probs_onemap_obj = glob("probs_onemap_*.RData") + Array[String] probs_onemap_obj_names = read_lines("names.txt") File vcfR_obj = "vcfR.RData" } } @@ -755,8 +806,8 @@ task SetProbsDefault { >>> runtime { - docker:"cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker:"cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu:1 # Cloud memory:"~{memory_size} MiB" @@ -786,7 +837,7 @@ task RemoveNonInformative { File vcf_file String parent1 String parent2 - String replaceADbyMissing + Boolean replaceADbyMissing } Int disk_size = ceil(size(vcf_file, "GiB") * 2) @@ -800,15 +851,15 @@ task RemoveNonInformative { remove_non_informative("~{vcf_file}", P1 = "~{parent1}", P2 = "~{parent2}", - replaceAD = "~{replaceADbyMissing}", + replaceAD = "~{replaceADbyMissing}" == "true", out.vcf = "filtered.vcf.gz") RSCRIPT >>> runtime { - docker:"cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker:"cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu:1 # Cloud memory:"~{memory_size} MiB" @@ -906,8 +957,8 @@ task QualPlots { >>> runtime { - docker: "cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker: "cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu: 1 # Cloud memory:"~{memory_size} MiB" @@ -1020,8 +1071,8 @@ task QualPlotsForHardFilteringSimulated { >>> runtime { - docker: "cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker: "cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu: 1 # Cloud memory:"~{memory_size} MiB" @@ -1067,8 +1118,8 @@ task FilterMulti { >>> runtime { - docker:"cristaniguti/reads2map:0.0.4" - singularity:"docker://cristaniguti/reads2map:0.0.4" + docker:"cristaniguti/reads2map:0.0.8" + singularity:"docker://cristaniguti/reads2map:0.0.8" cpu: 1 # Cloud memory:"~{memory_size} MiB" diff --git a/tests/subworkflows/genotyping_empirical/freebayes/polyrad/inputs.json b/tests/subworkflows/genotyping_empirical/freebayes/polyrad/inputs.json index 21a2603..4658462 100644 --- a/tests/subworkflows/genotyping_empirical/freebayes/polyrad/inputs.json +++ b/tests/subworkflows/genotyping_empirical/freebayes/polyrad/inputs.json @@ -10,5 +10,9 @@ "onemapMapsEmp.multiallelics": true, "onemapMapsEmp.parent1": "PT_F", "onemapMapsEmp.chromosome": "Chr10", - "onemapMapsEmp.vcf_file": "tests/data/Ptremula_PRJNA395596_subset/splitted_vcfs/biallelics_gatk.vcf.gz" + "onemapMapsEmp.vcf_file": "tests/data/Ptremula_PRJNA395596_subset/splitted_vcfs/biallelics_gatk.vcf.gz", + "onemapMapsEmp.prob_thres": 0.8, + "onemapMapsEmp.global_errors": ["0.001", "0.05"], + "onemapMapsEmp.genoprob_error": true, + "onemapMapsEmp.genoprob_global_errors": ["0.0001", "0.01"] } \ No newline at end of file diff --git a/tests/subworkflows/genotyping_empirical/freebayes/polyrad/test.yaml b/tests/subworkflows/genotyping_empirical/freebayes/polyrad/test.yaml index e758eee..79a7717 100644 --- a/tests/subworkflows/genotyping_empirical/freebayes/polyrad/test.yaml +++ b/tests/subworkflows/genotyping_empirical/freebayes/polyrad/test.yaml @@ -3,8 +3,3 @@ - regenotyping - genotyping_empirical command: miniwdl run -i tests/subworkflows/genotyping_empirical/freebayes/polyrad/inputs.json subworkflows/genotyping_empirical.wdl - files: - - path: _LAST/out/maps_report/1/freebayes_VCF_polyrad0.05_map_report.tsv.gz - md5sum: 83ef1b8c3f2209d71019149a5cf8ac03 - - path: _LAST/out/maps_report/0/freebayes_VCF_polyrad_map_report.tsv.gz - md5sum: d0a3acceb2f4d233437082758e11f8c7 \ No newline at end of file diff --git a/tests/subworkflows/genotyping_empirical/freebayes/supermassa/inputs.json b/tests/subworkflows/genotyping_empirical/freebayes/supermassa/inputs.json index 872c375..ca73a1c 100644 --- a/tests/subworkflows/genotyping_empirical/freebayes/supermassa/inputs.json +++ b/tests/subworkflows/genotyping_empirical/freebayes/supermassa/inputs.json @@ -10,5 +10,9 @@ "onemapMapsEmp.multiallelics": true, "onemapMapsEmp.parent1": "PT_F", "onemapMapsEmp.chromosome": "Chr10", - "onemapMapsEmp.vcf_file": "tests/data/Ptremula_PRJNA395596_subset/splitted_vcfs/biallelics_gatk.vcf.gz" + "onemapMapsEmp.vcf_file": "tests/data/Ptremula_PRJNA395596_subset/splitted_vcfs/biallelics_gatk.vcf.gz", + "onemapMapsEmp.prob_thres": 0.8, + "onemapMapsEmp.global_errors": ["0.001", "0.05"], + "onemapMapsEmp.genoprob_error": true, + "onemapMapsEmp.genoprob_global_errors": ["0.0001", "0.01"] } \ No newline at end of file diff --git a/tests/subworkflows/genotyping_empirical/freebayes/supermassa/test.yaml b/tests/subworkflows/genotyping_empirical/freebayes/supermassa/test.yaml index a860f97..9d918ff 100644 --- a/tests/subworkflows/genotyping_empirical/freebayes/supermassa/test.yaml +++ b/tests/subworkflows/genotyping_empirical/freebayes/supermassa/test.yaml @@ -2,9 +2,4 @@ tags: - regenotyping - genotyping_empirical - command: miniwdl run -i tests/subworkflows/genotyping_empirical/freebayes/supermassa/inputs.json subworkflows/genotyping_empirical.wdl - files: - - path: _LAST/out/maps_report/1/freebayes_VCF_supermassa0.05_map_report.tsv.gz - md5sum: 96cd6a8147dac759bb3b0ab56ce12ad7 - - path: _LAST/out/maps_report/0/freebayes_VCF_supermassa_map_report.tsv.gz - md5sum: aef9ef4f5a2962f5972657e6a336259f \ No newline at end of file + command: miniwdl run -i tests/subworkflows/genotyping_empirical/freebayes/supermassa/inputs.json subworkflows/genotyping_empirical.wdl \ No newline at end of file diff --git a/tests/subworkflows/genotyping_empirical/freebayes/updog/inputs.json b/tests/subworkflows/genotyping_empirical/freebayes/updog/inputs.json index 0966b51..184c722 100644 --- a/tests/subworkflows/genotyping_empirical/freebayes/updog/inputs.json +++ b/tests/subworkflows/genotyping_empirical/freebayes/updog/inputs.json @@ -10,5 +10,9 @@ "onemapMapsEmp.multiallelics": true, "onemapMapsEmp.parent1": "PT_F", "onemapMapsEmp.chromosome": "Chr10", - "onemapMapsEmp.vcf_file": "tests/data/Ptremula_PRJNA395596_subset/splitted_vcfs/biallelics_gatk.vcf.gz" + "onemapMapsEmp.vcf_file": "tests/data/Ptremula_PRJNA395596_subset/splitted_vcfs/biallelics_gatk.vcf.gz", + "onemapMapsEmp.prob_thres": 0.8, + "onemapMapsEmp.global_errors": ["0.001", "0.05"], + "onemapMapsEmp.genoprob_error": true, + "onemapMapsEmp.genoprob_global_errors": ["0.0001", "0.01"] } \ No newline at end of file diff --git a/tests/subworkflows/genotyping_empirical/freebayes/updog/test.yaml b/tests/subworkflows/genotyping_empirical/freebayes/updog/test.yaml index 5119fef..e471c8b 100644 --- a/tests/subworkflows/genotyping_empirical/freebayes/updog/test.yaml +++ b/tests/subworkflows/genotyping_empirical/freebayes/updog/test.yaml @@ -2,9 +2,4 @@ tags: - regenotyping - genotyping_empirical - command: miniwdl run -i tests/subworkflows/genotyping_empirical/freebayes/updog/inputs.json subworkflows/genotyping_empirical.wdl - files: - - path: _LAST/out/maps_report/1/freebayes_VCF_updog0.05_map_report.tsv.gz - md5sum: 71bc4410991efd0bb7e8ec2414ba17f1 - - path: _LAST/out/maps_report/0/freebayes_VCF_updog_map_report.tsv.gz - md5sum: 6403f838779e405f357089f9a8a66ac0 \ No newline at end of file + command: miniwdl run -i tests/subworkflows/genotyping_empirical/freebayes/updog/inputs.json subworkflows/genotyping_empirical.wdl \ No newline at end of file