From 3381ab3f4e5df40ef8752d04cb98372699194176 Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Thu, 30 Nov 2023 15:14:45 -0600 Subject: [PATCH 1/9] add mappoly new functions --- .dockerfiles/reads2map/Dockerfile | 3 +- README.md | 4 + tasks/BWA.wdl | 2 +- tasks/mappoly.wdl | 140 ++++++++++++------------------ 4 files changed, 63 insertions(+), 86 deletions(-) diff --git a/.dockerfiles/reads2map/Dockerfile b/.dockerfiles/reads2map/Dockerfile index e7406ae..867b9a4 100644 --- a/.dockerfiles/reads2map/Dockerfile +++ b/.dockerfiles/reads2map/Dockerfile @@ -53,10 +53,11 @@ RUN Rscript -e 'remotes::install_version("gsalib",upgrade="never", version = "2. RUN Rscript -e 'remotes::install_github("tpbilton/GUSbase", ref = "92119b9c57faa7abeede8236d24a4a8e85fb3df7")' RUN Rscript -e 'remotes::install_github("tpbilton/GUSMap", ref = "4d7d4057049819d045750d760a45976c8f30dac6")' -RUN Rscript -e 'remotes::install_github("dcgerard/updog", ref="f1")' +RUN Rscript -e 'remotes::install_github("dcgerard/updog")' RUN Rscript -e 'remotes::install_github("Cristianetaniguti/onemap")' # Still privates RUN Rscript -e 'remotes::install_github("Cristianetaniguti/simuscopR")' RUN Rscript -e 'remotes::install_github("Cristianetaniguti/Reads2MapTools")' +RUN Rscript -e 'remotes::install_github("mmollin/MAPpoly")' diff --git a/README.md b/README.md index 5cddbfb..43be2ad 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,10 @@ Taniguti, C. H.; Taniguti, L. M.; Amadeu, R. R.; Lau, J.; de Siqueira Gesteira, - [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations - [MAPpoly](https://github.com/mmollina/MAPpoly) in [cristaniguti/reads2map:0.0.5](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Build linkage maps for autopolyploid species +### How to cite + +Taniguti, C. H.; Taniguti, L. M.; Amadeu, R. R.; Lau, J.; de Siqueira Gesteira, G.; Oliveira, T. de P.; Ferreira, G. C.; Pereira, G. da S.; Byrne, D.; Mollinari, M.; Riera-Lizarazu, O.; Garcia, A. A. F. Developing best practices for genotyping-by-sequencing analysis in the construction of linkage maps. GigaScience, 12, giad092. https://doi.org/10.1093/gigascience/giad092 + ### Funding This work was partially supported by the National Council for Scientific and Technological Development (CNPq - 313269/2021-1); by USDA, National Institute of Food and Agriculture (NIFA), Specialty Crop Research Initiative (SCRI) project “Tools for Genomics Assisted Breeding in Polyploids: Development of a Community Resource” (Award No. 2020-51181-32156); and by the Bill and Melinda Gates Foundation (OPP1213329) project SweetGAINS. \ No newline at end of file diff --git a/tasks/BWA.wdl b/tasks/BWA.wdl index fe85c62..23b38cd 100644 --- a/tasks/BWA.wdl +++ b/tasks/BWA.wdl @@ -48,7 +48,7 @@ task RunBwaAlignment { SORT_ORDER=coordinate \ CREATE_INDEX=true; mv "${sampleName_list[$index]}.${lib_list[$index]}.sorted.bai" "${sampleName_list[$index]}.${lib_list[$index]}.sorted.bam.bai"; - BAMS+=("I=${sampleName_list[$index]}.${lib_list[$index]}.sorted.bam") + BAMS+=("I=${sampleName_list[$index]}.${lib_list[$index]}.sorted.bam")x` done sampleName_unique=($(echo "${sampleName_list[@]}" | tr ' ' '\n' | sort -u | tr '\n' ' ')) diff --git a/tasks/mappoly.wdl b/tasks/mappoly.wdl index 377d2d5..00b48d3 100644 --- a/tasks/mappoly.wdl +++ b/tasks/mappoly.wdl @@ -29,15 +29,14 @@ task MappolyReport { parent.2 = "~{parent2}", verbose = FALSE, read.geno.prob = TRUE, - prob.thres = prob.thres, ploidy = ~{ploidy}) - - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_raw_data.png")) - plot(dat) - dev.off() + prob.thres = prob.thres, + ploidy = ~{ploidy}) dat <- filter_missing(input.data = dat, type = "marker", filter.thres = 0.25, inter = FALSE) + dat <- filter_missing(dat, type = 'individual', filter.thres = 0.1, inter = FALSE) + if("~{filt_segr}"){ pval.bonf <- 0.05/dat[[3]] mrks.chi.filt <- filter_segregation(dat, @@ -48,85 +47,58 @@ task MappolyReport { } else { seq.init <- make_seq_mappoly(dat, "all") } - - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_","filters.png")) - plot(seq.init) - dev.off() - - all.rf.pairwise <- est_pairwise_rf(input.seq = seq.init, ncpus = ~{max_cores}) - mat <- rf_list_to_matrix(input.twopt = all.rf.pairwise) - - id<-get_genomic_order(seq.init) - s.o <- make_seq_mappoly(id) - - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_","rf.png")) - plot(mat, ord = s.o[[3]]) - dev.off() - - tpt <- make_pairs_mappoly(all.rf.pairwise, input.seq = s.o) - temp2 <- rf_snp_filter(input.twopt = tpt, diagnostic.plot = FALSE) - lgtemp <- get_genomic_order(temp2) - s.o <- make_seq_mappoly(lgtemp) - - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_","rf.filt.png")) - plot(mat, ord = s.o[[3]]) - dev.off() - - # est.map <- est_rf_hmm_sequential(input.seq = s.o, - # start.set = 5, - # thres.twopt = 10, - # thres.hmm = 50, - # extend.tail = 30, - # twopt = all.rf.pairwise, - # verbose = F, - # phase.number.limit = 20, - # sub.map.size.diff.limit = 5) - - # map.err <- est_full_hmm_with_global_error(input.map = est.map, error = 0.05) - # map.prob <- est_full_hmm_with_prior_prob(input.map = est.map, dat.prob = dat) - - # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_cMbyMb.png")) - # plot_genome_vs_map(est.map, same.ch.lg = TRUE) - # dev.off() - - # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"global_error_cMbyMb.png")) - # plot_genome_vs_map(map.err, same.ch.lg = TRUE) - # dev.off() - - # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_cMbyMb.png")) - # plot_genome_vs_map(map.prob, same.ch.lg = TRUE) - # dev.off() - - # summary <- summary_maps(list(est.map, map.err, map.prob)) - # summary <- cbind(method = c("no_error", "global_error", "probs", "-"), summary) - - # write.csv(summary, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_summary.csv")) - - # export_map_list(est.map, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_","map_file.csv")) - # export_map_list(map.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_global_error_","map_file.csv")) - # export_map_list(map.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_","map_file.csv")) - - # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_draw.png")) - # plot_map_list(list(default = est.map, - # global = map.err, - # probs = map.prob), col = "ggstyle") - # dev.off() - - # genoprob <- calc_genoprob_error(input.map = est.map, error = 0) - # genoprob.err <- calc_genoprob_error(input.map = map.err, error = 0.05) - # genoprob.prob <- calc_genoprob_dist(input.map = map.prob, dat.prob = dat) - - # homoprobs = calc_homologprob(genoprob) - # homoprobs.err = calc_homologprob(genoprob.err) - # homoprobs.prob = calc_homologprob(genoprob.prob) - - # save(homoprobs, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.RData")) - # save(homoprobs.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.err.RData")) - # save(homoprobs.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.prob.RData")) + + # Estimate two-point recombination fraction + tpt <- est_pairwise_rf(input.seq = seq.init, ncpus = ~{max_cores}) + mat <- rf_list_to_matrix(input.twopt = tpt) + + # Filter markers by recombination fraction values + seq.filt <- rf_snp_filter(input.twopt = tpt, diagnostic.plot = FALSE, probs = c(0.05, 0.95)) + mat2 <- make_mat_mappoly(mat, seq.filt) + + # Run MDS for ordering according to recombination fractions + seq_test_mds <- mds_mappoly(mat2) + seq_mds <- make_seq_mappoly(seq_test_mds) + + # Sequence with genomic order + geno_order <- get_genomic_order(seq_mds) + seq_geno_order <- make_seq_mappoly(geno_order) + + init.map.list <- framework_map(input.seq = seq_geno_order, + twopt = tpt, + start.set = 5, + inflation.lim.p1 = 10, + inflation.lim.p2 = 10, + verbose = FALSE) + + res <- update_framework_map(input.map.list = init.map.list, + input.seq = seq_geno_order, + twopt = tpt, + thres.twopt = 5, + init.LOD = 100, + max.rounds = 3, + size.rem.cluster = 3, + gap.threshold = 3, + verbose = FALSE) + + # Get last interaction + iter <- length(res[[2]][[1]]) + map_error <- est_full_hmm_with_global_error(res[[2]][[1]][[iter]], error = 0.05, verbose = FALSE) + map_prob <- est_full_hmm_with_prior_prob(res[[2]][[1]][[iter]], dat.prob = dat, verbose = FALSE) + + # Diagnostic graphics - overall from plot(dat) + # Heatmap of mds with plot(mat2, ord=seq_mds) + # Heatmap of genomic order order plot(mat2, ord = map_error$info$mk.names) + # Relation between mds and genome plot(seq_mds$genome.pos) + + saveRDS(dat, file= "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_dat.rds") + saveRDS(mat2, file="~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_mat2.rds") + saveRDS(seq_mds, file="~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_seq_mds.rds") + saveRDS(map_error, file="~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_map_error.rds") + saveRDS(map_prob, file= "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_map_prob.rds") system("mkdir results") - #system("mv *.png *.RData *csv results") - system("mv *.png results") + system("mv *.rds results") system(paste0("tar -czvf ", "~{SNPCall_program}", "_", "~{GenotypeCall_program}", "_", "~{CountsFrom}","_results.tar.gz results")) @@ -135,8 +107,8 @@ task MappolyReport { >>> runtime { - docker:"cristaniguti/reads2map:0.0.5" - singularity: "docker://cristaniguti/reads2map:0.0.5" + docker:"cristaniguti/reads2map:0.0.9" + singularity: "docker://cristaniguti/reads2map:0.0.9" cpu: max_cores # Cloud memory:"~{memory_size} MiB" From 8d3e2e86483fcd0650dda0e0e3dfeb5b1d82d914 Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Thu, 30 Nov 2023 16:30:49 -0600 Subject: [PATCH 2/9] adapt tests --- tasks/mappoly.wdl | 4 ++-- tests/data/polyploid/fastq/samples_info.txt | 4 ---- .../fastq/{ => single_end}/1.fastq.gz | Bin .../fastq/{ => single_end}/98.fastq.gz | Bin .../fastq/{ => single_end}/P1.fastq.gz | Bin .../fastq/{ => single_end}/P2.fastq.gz | Bin .../fastq/single_end/samples_info.txt | 4 ++++ .../inputs_poly_pair_end.json | 3 ++- .../inputs_poly_single_end.json | 18 ++++++++++++++++++ .../test_poly_single_end.yaml | 4 ++++ 10 files changed, 30 insertions(+), 7 deletions(-) delete mode 100644 tests/data/polyploid/fastq/samples_info.txt rename tests/data/polyploid/fastq/{ => single_end}/1.fastq.gz (100%) rename tests/data/polyploid/fastq/{ => single_end}/98.fastq.gz (100%) rename tests/data/polyploid/fastq/{ => single_end}/P1.fastq.gz (100%) rename tests/data/polyploid/fastq/{ => single_end}/P2.fastq.gz (100%) create mode 100644 tests/data/polyploid/fastq/single_end/samples_info.txt create mode 100644 tests/subworkflows/create_alignment_from_families_files/inputs_poly_single_end.json create mode 100644 tests/subworkflows/create_alignment_from_families_files/test_poly_single_end.yaml diff --git a/tasks/mappoly.wdl b/tasks/mappoly.wdl index 377d2d5..5fd6b3e 100644 --- a/tasks/mappoly.wdl +++ b/tasks/mappoly.wdl @@ -128,7 +128,7 @@ task MappolyReport { #system("mv *.png *.RData *csv results") system("mv *.png results") - system(paste0("tar -czvf ", "~{SNPCall_program}", "_", "~{GenotypeCall_program}", "_", "~{CountsFrom}","_results.tar.gz results")) + system(paste0("tar -czvf ", "~{SNPCall_program}", "_", "~{GenotypeCall_program}", "_", "~{CountsFrom}","_poly_results.tar.gz results")) RSCRIPT @@ -154,6 +154,6 @@ task MappolyReport { } output { - File results = "~{SNPCall_program}_~{GenotypeCall_program}Poly_~{CountsFrom}_results.tar.gz" + File results = "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_poly_results.tar.gz" } } \ No newline at end of file diff --git a/tests/data/polyploid/fastq/samples_info.txt b/tests/data/polyploid/fastq/samples_info.txt deleted file mode 100644 index 9e13e9d..0000000 --- a/tests/data/polyploid/fastq/samples_info.txt +++ /dev/null @@ -1,4 +0,0 @@ -tests/data/polyploid/fastq/1.fastq.gz 1 1 -tests/data/polyploid/fastq/98.fastq.gz 98 98 -tests/data/polyploid/fastq/P1.fastq.gz P1 P1 -tests/data/polyploid/fastq/P2.fastq.gz P2 P2 diff --git a/tests/data/polyploid/fastq/1.fastq.gz b/tests/data/polyploid/fastq/single_end/1.fastq.gz similarity index 100% rename from tests/data/polyploid/fastq/1.fastq.gz rename to tests/data/polyploid/fastq/single_end/1.fastq.gz diff --git a/tests/data/polyploid/fastq/98.fastq.gz b/tests/data/polyploid/fastq/single_end/98.fastq.gz similarity index 100% rename from tests/data/polyploid/fastq/98.fastq.gz rename to tests/data/polyploid/fastq/single_end/98.fastq.gz diff --git a/tests/data/polyploid/fastq/P1.fastq.gz b/tests/data/polyploid/fastq/single_end/P1.fastq.gz similarity index 100% rename from tests/data/polyploid/fastq/P1.fastq.gz rename to tests/data/polyploid/fastq/single_end/P1.fastq.gz diff --git a/tests/data/polyploid/fastq/P2.fastq.gz b/tests/data/polyploid/fastq/single_end/P2.fastq.gz similarity index 100% rename from tests/data/polyploid/fastq/P2.fastq.gz rename to tests/data/polyploid/fastq/single_end/P2.fastq.gz diff --git a/tests/data/polyploid/fastq/single_end/samples_info.txt b/tests/data/polyploid/fastq/single_end/samples_info.txt new file mode 100644 index 0000000..76f3bec --- /dev/null +++ b/tests/data/polyploid/fastq/single_end/samples_info.txt @@ -0,0 +1,4 @@ +tests/data/polyploid/fastq/single_end/1.fastq.gz 1 1 +tests/data/polyploid/fastq/single_end/98.fastq.gz 98 98 +tests/data/polyploid/fastq/single_end/P1.fastq.gz P1 P1 +tests/data/polyploid/fastq/single_end/P2.fastq.gz P2 P2 diff --git a/tests/subworkflows/create_alignment_from_families_files/inputs_poly_pair_end.json b/tests/subworkflows/create_alignment_from_families_files/inputs_poly_pair_end.json index 0312143..261f9a6 100644 --- a/tests/subworkflows/create_alignment_from_families_files/inputs_poly_pair_end.json +++ b/tests/subworkflows/create_alignment_from_families_files/inputs_poly_pair_end.json @@ -4,6 +4,7 @@ "CreateAlignmentFromFamilies.gatk_mchap": false, "CreateAlignmentFromFamilies.families_info": "tests/data/polyploid/fastq/pair_end/samples_info.txt", "CreateAlignmentFromFamilies.chunk_size": 2, + "CreateAlignmentFromFamilies.pair_end": false, "CreateAlignmentFromFamilies.references": { "ref_fasta": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta", "ref_dict": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.dict", @@ -14,4 +15,4 @@ "ref_fasta_index": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.fai", "ref_pac": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.pac" } - } \ No newline at end of file + } diff --git a/tests/subworkflows/create_alignment_from_families_files/inputs_poly_single_end.json b/tests/subworkflows/create_alignment_from_families_files/inputs_poly_single_end.json new file mode 100644 index 0000000..7a43de0 --- /dev/null +++ b/tests/subworkflows/create_alignment_from_families_files/inputs_poly_single_end.json @@ -0,0 +1,18 @@ +{ + "CreateAlignmentFromFamilies.max_cores": 2, + "CreateAlignmentFromFamilies.rm_dupli": false, + "CreateAlignmentFromFamilies.gatk_mchap": false, + "CreateAlignmentFromFamilies.families_info": "tests/data/polyploid/fastq/single_end/samples_info.txt", + "CreateAlignmentFromFamilies.chunk_size": 2, + "CreateAlignmentFromFamilies.pair_end": false, + "CreateAlignmentFromFamilies.references": { + "ref_fasta": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta", + "ref_dict": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.dict", + "ref_ann": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.ann", + "ref_sa": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.sa", + "ref_amb": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.amb", + "ref_bwt": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.bwt", + "ref_fasta_index": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.fai", + "ref_pac": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta.pac" + } + } diff --git a/tests/subworkflows/create_alignment_from_families_files/test_poly_single_end.yaml b/tests/subworkflows/create_alignment_from_families_files/test_poly_single_end.yaml new file mode 100644 index 0000000..36d539d --- /dev/null +++ b/tests/subworkflows/create_alignment_from_families_files/test_poly_single_end.yaml @@ -0,0 +1,4 @@ +- name: Align empirical FASTQ reads to a reference genome + tags: + - create_alignment_from_families_files + command: miniwdl run -i tests/subworkflows/create_alignment_from_families_files/inputs_poly_single_end.json subworkflows/create_alignment_from_families_files.wdl From 692599529e750803fd2f693e2d9607d7df50b12f Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Thu, 30 Nov 2023 19:50:37 -0600 Subject: [PATCH 3/9] tassel and stacks also for poly --- .configurations/cromwell_no_mysql.conf | 2 +- tasks/BWA.wdl | 2 +- tasks/bcftools.wdl | 8 ++++---- tasks/tassel.wdl | 8 ++++---- .../vcfs/stacks_norm_example_single_end.vcf.gz | Bin 0 -> 6764 bytes .../vcfs/tassel_norm_example_single_end.vcf.gz | Bin 0 -> 3687 bytes .../EmpiricalSNPCalling/inputs_poly.json | 12 ++++++++---- 7 files changed, 18 insertions(+), 14 deletions(-) create mode 100644 tests/data/polyploid/vcfs/stacks_norm_example_single_end.vcf.gz create mode 100644 tests/data/polyploid/vcfs/tassel_norm_example_single_end.vcf.gz diff --git a/.configurations/cromwell_no_mysql.conf b/.configurations/cromwell_no_mysql.conf index 01558c6..db4f1a1 100644 --- a/.configurations/cromwell_no_mysql.conf +++ b/.configurations/cromwell_no_mysql.conf @@ -1,5 +1,5 @@ backend { - default = SlurmSingularity + default = Local providers { diff --git a/tasks/BWA.wdl b/tasks/BWA.wdl index 23b38cd..fe85c62 100644 --- a/tasks/BWA.wdl +++ b/tasks/BWA.wdl @@ -48,7 +48,7 @@ task RunBwaAlignment { SORT_ORDER=coordinate \ CREATE_INDEX=true; mv "${sampleName_list[$index]}.${lib_list[$index]}.sorted.bai" "${sampleName_list[$index]}.${lib_list[$index]}.sorted.bam.bai"; - BAMS+=("I=${sampleName_list[$index]}.${lib_list[$index]}.sorted.bam")x` + BAMS+=("I=${sampleName_list[$index]}.${lib_list[$index]}.sorted.bam") done sampleName_unique=($(echo "${sampleName_list[@]}" | tr ' ' '\n' | sort -u | tr '\n' ' ')) diff --git a/tasks/bcftools.wdl b/tasks/bcftools.wdl index 350cea9..1a1ce3a 100644 --- a/tasks/bcftools.wdl +++ b/tasks/bcftools.wdl @@ -60,9 +60,9 @@ task FixTasselVCF { command <<< sed 's/PL,Number=./PL,Number=G/g' ~{vcf_file} > tassel_fix.vcf - sed 's/AD,Number=./AD,Number=R/g' tassel_fix.vcf > tassel_fix.vcf - sed 's/AF,Number=./AF,Number=A/g' tassel_fix.vcf > tassel_fix.vcf - sed '/INFO=' tassel_fix.vcf > tassel_fix.vcf + sed -i 's/AD,Number=./AD,Number=R/g' tassel_fix.vcf + sed -i 's/AF,Number=./AF,Number=A/g' tassel_fix.vcf + sed -i '/INFO=' tassel_fix.vcf grep ">" ~{reference} > chrs sed -i 's/>//' chrs @@ -97,6 +97,6 @@ task FixTasselVCF { } output { - File vcf_fixed = "tassel_fix_chr.vcf" + File vcf_fixed = "tassel_fix_chr.vcf.gz" } } \ No newline at end of file diff --git a/tasks/tassel.wdl b/tasks/tassel.wdl index bf8b28a..deb8a99 100644 --- a/tasks/tassel.wdl +++ b/tasks/tassel.wdl @@ -212,8 +212,8 @@ task TasselBeforeAlign { } Int disk_size = ceil(size(fastq, "GiB")) - Int memory_min = ceil(max_ram/2) - Int memory_max = max_ram - 5000 + Int memory_min = ceil(max_ram/3) + Int memory_max = max_ram command <<< @@ -266,8 +266,8 @@ task TasselAfterAlign { } Int disk_size = ceil(size(tassel_database, "GiB")) - Int memory_min = ceil(max_ram/2) - Int memory_max = max_ram - 5000 + Int memory_min = ceil(max_ram/3) + Int memory_max = max_ram command <<< diff --git a/tests/data/polyploid/vcfs/stacks_norm_example_single_end.vcf.gz b/tests/data/polyploid/vcfs/stacks_norm_example_single_end.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..21df4ad534d39adb6573bc17636c5a9955776b01 GIT binary patch literal 6764 zcmV-y8k6N8iwFb&00000{{{d;LjnL#8swVWa^uDkhOga*fEYd3x7=F{!)BSf3UcVksrsa4xzZotM ze>pz=VhZ)$>({4m-d_Cp>F_UaP7XgDouBtl%Ef3txm!+Vw}-Eerqc{}Ez9{LyBjVR z<<+ad;vO34WVkF3l~7tLA@R4x?0!Bf4_}=xhoj#X*%wvV_zHi2^X~LLkA8I0e|LX# zSf9Km!_es7H`b16sfR-T6~)PnTq~adtkuxto@Y>>m?M4b~g- zaHsD-{d{!s3)9H|EJ;IbhgCD*~k0gbh7+obC|QY z^)P3hOLJD)GsTYnYEunaygTE|`mrQO=k1$?fOegRHOcbT;~Rv78TY zuQrA_z8=npqvbA?o5?6k_hifKG8@h2^Kx-FyS>79X4T%nxc7?;w4ZMm+3c3T^l~&_ z&SulaueaEszhJ-7h_aBPP~TrK7t_gQo_^cB_IP%4gKs;ee`k3Z=(l`cjCjJ>%k^U+wk+(i5fr}GIS#wg2VCgi}#fr=T${_kEN|Mcno&%F=t&wFo9dY^tg?H#?n=zaYA(c4~1 zEIryyy|f>DvRKUK%krwYDd(T@zm!_at6mIUpM2=N=Y#6___KJgIQ2o~v2Vx*4e)?jd!yp&^ zTo?E)m$rz0<5FA~2DXpuwHxHvr_uN64pqd^@Yo0*sSSXRdS}}}kMqzE@%V~IIDCgM zj9R-?CLYnpn-K%JM-fdww}m6BibqPJg6IXz&<^qq3+V);wcbP`3_tJ$AqHuR8lx)A zRPl#O;Rhd^^sy^K=;vM*VVluOFyf6amGA*?Vck$zn7j>X84}lQSZ$vmqb5k90C};$ zkcJ^8LmurXq!wh!sMsa532UmV#{?YA9q zBNCzh%DD5byirN#A$p*AqUl1!CA$z!9GK9TH3ftye$ADzg*Qp2N>Tuxlqv(L!bn!C zMyo=^EyWpdJzmYklyR^Rw5RoSLxUX1wlI?Iib6Y)B_Tu8>X>+}EsXEe_rwA_GKp}K ze-#3$URzj9%0h>G^)PV7t76UTYhG-3BYF~YT!pQmWV-5uwVEKZJ#?Y4fW{Oq@^lbE z;Jok#nvNHR4>TL8Wnmp(fO^EJzIBxYU=lH10g@o#y7Hv>>EMV+jkuw(MKr0z!X;T1w^kjDnI4<8im~w3|(WT!PTu9MD~B*>Hbfi zfoJ<$y<-7nAyzBkcP)S`B+o#?w}o#_L9M1>1nTaP?10JGW(uAb0nG^0RpCEaP;Uli zh1G^XB7PxwBf^25%H=|sG;eL`)kH!pkHBTd!LF`>3MgQx4-obWN=kwp#hj4dPlgHnPuC|h&j2j7_4PxM1O)e6k;xRMajqDS6K># z3eIR+^edgF3rj&Xq;1XUxVwsxSa(^QzETJ^ysF_91$p%O0(_+fuh>VdXZWM9fVb=; zI@a(?wxuq)koR!UZFo=Hi^hOnHoN^hu;IPLPBb;U(+0H;jU-i9hTYBX)W9}_Bj>>< zu-9dk+%>W5z6#X^;Bu_vHFRf+69S73)f}qy6G<9y6@g1J?J5as@_wxvRSJGhN&&(K zsk1ea;AkR2cXbdt;t3|=S_K741V>Uqz(4Jd#xS7qrdvF^@bDW{-kx-F691rvOZTm+ zm7b}VBP(w=FLZUo4CTq~zSVY^<#`f5ZmbCZxyIVh9q6GJ})gd=699KhI)S6l%P1OlyM6Chq zG-((X<~X^6LHpQrJfIV`yhoB>w=%^%J5Wj3UFD|Xs3JLgvPVmr1Q1#HQYWZVMmU)5 z@DR5WNw`|EK!hky(;-;hiqA2!)W!*!Tub53YuYe2#js6GLD|HYfEJmu z?ak;FFl}Jd9_{(=Gm;uXv_0(BnV;z>14*YDe6OKnZW4LO-O&G9a%r6Gg>(u$Rd*?< z%2V9pWPXpIW~vtMi>a~~bJZ=VNlWkC#lT`{%7B!-MGKswTv+wr&YlQPWvC?)P1Okd z8#RBqh}i+2o;Q%Ysj0C^tuJ3uqqYTgx`6sU63Pn&jW=y8(cX!{NgY$T`;P zElS-aGn0fG=)O*0U57UZujsb7!S<~x3PH(2rrYWBR1lFeX=x6l43pb!Dcxo7#7&!q z9EE&?{RHobT*+#V+VN~|Ohp>@6)F}}G2p3ZkO~2a3bf;L_65D^|PcTd&^olLqO5Yp!KOqncN?aiC3$FKv;HEJ)^B( zNZkhZ4D7o)HJ!i+v_AEL5++MUM&GtA7oo0^a;7S^Roz%3V7QRe)qqUdV7GRj#`pks z?oVT3%NBNDV_`(eLU9R!Zg6~(fjza(Pr zbV|hLMe5b^5%ZGrpvB`KM7d&D@(6{<;ou_v##u1^XiC4AU*ob5Gvw1M|_eB0RWh)8$eB5Q}wW9 z(iS-O@v+%I)H!iUC@Y^ruA(+sbBN@S5~+y`5u~b<9)WXDg^U^MHecg;$R8`cPt(9- zdWze4o6l{jhpdj19n1n6EQuI_q9er(XtnjG^j5Wk!WIWPrte~?oWcbvV0o@|8WkHl zjR}#F)z&fI<8)jQrA1_I)o0B3I2^$G08IB1QPe3Uf>YPp(s`%wq!u=|r1bu*=XX{u zP+jRq>ZO4)$O?ZD>TZ$Y8YnfFD=B0pJq)0#r!Q^QjL>Q${?Opr>x6mr$bXGXURQCEl}?qsx~W>g)F zc+iLp`OHUT{-=mo96Gj(UEPL`@(i0ywURa(w~*gxHjAq5j$=>+N|UxbZ3%8A$w=8U z4S?f6J{xvbR6-k7a)~E5+MF#|G}!=a%`-bW|7qr0k|Vd3;JVN9IwtV{%?p$R39^5%#PkF$n_o$;`qF*eb0Ezy`mA4F|B%z*bd(=U|h84U3iSC|9CY zu>YSq3g*$iqCbBoXQ#Q-P~e0{4a#XGFAfzeKfDmV5pS+q46}VWSZ-cX+UZ}O=K^DJMRa_513%F~Z3bi}@bt%(g z5q^@U0UKj^tZ4;Mmq}JI!Jq+oF>8h`5ckLx3g+EN2MIdko2JGH`HruEAFxIWFUH(+ z1F9P;lbzHJAchew6YvF#rCQ|qJc$AzF&hZ=2a31cPY20g0QCwGB52qqDG>j?mO%xr z{_6SJ^ZD)U0D1TMew*a*JfTil9p6^ZFY^Aqf7|ohLtK65IQ%96MgZL*^4zB~jwP)C zv?>5X`ge9fJL~}RkQ-n~C zGD97Xq_zD8rYXq;Aba*`M`$=UfY!?$WH_c(M13=rHD@h77}fGwFX0p#{zG3`M?2wpV_wSlh>>o8zNtu^Rj z{(vtjzH-7BF5_cGMSMXt4k0yt{%XByvGs~^!vmTC^)@Ku5^uDCN~enkpkb%eKvbk~ zbBBzNqRHU1 zsD_c?J(Yzy#;BTOpF^wx6%wld5^lOb(7i$FjMDyz*_2OfZPcJ1<{d;=;~OFfEXe>U zfuX=?luMca?|K*n3~B{fPnTC<;a&yU(0&hW{~GKZ?I9xLt4E^s6u{Tw(qwP19!y84F9g81UCG?zaSC-+j zrkKkYt8oD}dq=3=_fs>nNM~p89X^o<1^X| zw?=266lEd3%&r_Dd)T7cmgk!%4LxkCAHH&v(V{jW!)(IU3${@zUQ5XUToo&FW4P=ciRAE&dqCdIk+v82*WJO5n)h#KWnUM9G7l zTgNL#F=ocRV1xu&Eksssmb;i&kTIzNj2gQDtNq6e7hM4EsWW9xjO9-J--!QhwP+b? z3Y3PDPBxy4rKQ$Cq(72U=v8HkvP}m#&RcA%y4Py}qK;NAK%ojO>?PhU!9Rjx@U$Az z4e>-!l?@eD=wxbMT%kf^Kv5>T%hboIMS^65krIt8#r7Fi#19?`!DTN~y0PJ$9$WPJ zJyot%{VAYs5xKM*9Jx`>0Aa^B{c7mj`(b{qi@7BLlAYqhV81PIRtHoRtHlCm@5IS< zzmo~|QD_D%6rfoXk~Z>oiv!Y8MFb_bFqT9A=_^#4OoiIvAzysw)G!cEq+w}Yx`H%} zP5_@b8PdircR-qH2Lg?lW@~DBhif{mG?4~6q+tw&m*temB1oUPKyrPZB`xz~qwi=; zRDfU&I`WXiD3@7K9E+1AlYt=CTUjB+5&&tft8VenL5h@E^cIaf2TCGN+s<}2B@xtw z#+yd4`*}(_65F<6iZf8!vG6E%#8adoQR1We?Y@s+SJMj8Ngum3ZvDN}faFCUo zu<^*>{-dlv$3*g^iV}Fo{LtZ#^4s)n5b|Ao(|OB-u-6kVnEtYkgR<)Lk| zS_|9?CMF~SGq^t-XF~NOH2R;8lRGpjFMUfte#J>G9zcUeqRyL+%&0(kB?{VVcAOyc z-r-P@=fJ!{TA>8@0F=sJ|GE*{&oGOwXWrJc9#c@1<)r8}mUGwf=*%P55Le3?6FQ9t zJvHV1nuzniaXbASmNElM9OLXDZe>!*x(fGJ@?=@fdxlRu)bZU2Tovl zubHG0oS{*r!&ZR9pSA3OK`m;}jbu~G8EhM4Kux@a6R2p=915BiZC&r&-9;X*9Rmxm z#lY<%Y#5~drw9L;%mqnrusTN|%-F;SDvtms<8TJxSJhn5Gwm)e!{E!E5g!s)TUo?uf7w6`{1IuM$dT{=; zYugs4%PjfE5(=v_0pqu;B9P<|Pow?-OeuPns;rIf2lTc$OoK0qHQ6j6XLaWse5pBc zFx_zg?$&iwo|P*nqz3V1{v|dH)=efhB)T8ePvoR+HmyLP8MXNc zEWkF7E;<`iyJohKXOZh4m%nxyiswk>*!*6Xg^v zv)P&@$WnbQ!6=W&qw+4e-MFmAWOi0210OFBa&mPFH$s+?`59n%S8*%GgEttn{JdNs zxqYh9?WGvanhYIgqvn9zKI9ba8aU2me#{6fT+MK~w{RV&8k0_nra7)KdXjJnSMq|Z zSAq5GHi7!Q5SJa}FUgObKBGYF&i zVyLqMj^C$mu5XwUbe7}xqo4J%v)eWlc;rFs z&||MR`cSG_<%<;n^x_1Nt~;v@0fJ|+a zHXLF_RV(Rh+UQk+j0LM!(bFYJ*&x;Lh4c^h6{0t+YXATriwFb&00000{{{d;LjnLB O00RI30000000025Jo6p^ literal 0 HcmV?d00001 diff --git a/tests/data/polyploid/vcfs/tassel_norm_example_single_end.vcf.gz b/tests/data/polyploid/vcfs/tassel_norm_example_single_end.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..ff400684c7c3eef83a2c3fffb293b783855b6752 GIT binary patch literal 3687 zcmV-t4w&&DiwFb&00000{{{d;LjnLw4&__jbKA%be^&nr&gG?@Qucm-=<3cW_QlOi z&T(S*`k0YrDppUnTuHuM?yoPfyZjKjT-tf4o#~7|AdyS{fCaDs7JT(;v0N34_2zcA zJNo7L&2Qp>{OQ%JH*eov{`B$aZ*NbI&Zid_`AM;zZK;ikmm?hdlcnJB4}Y0$vj@0Y2H%_3SO?(E`;R|QFFkp+X3VTvO9o^Qo5V% zN}StldFM`BZ?20?wpwm?L3)k*pPaYANALf!m|bTlc=PPf>$}z0FeM+;y9MxdD!{=@CfP$@BFQNc2aiCbRo{5I-tdwp?V} z<*rhotL4nkjYIqP?9F?J_3Yv?V)dtF>qT}kyS>L5`_~dgd@|e3_AmFiS-M>BW-C8U z`@^Si%Hh*q$xXfDzu6T3c>tCAx_^Z*B^UGcrWl?{Z=KopzL+l;ptf5a<}Ns2-|d#4 z-6fB2HbmfmSM$YgyXpy7na-FS5fq0Q^#{R^#ILV*Y?~ z+#X(>ogaIHI=rIfx=_Vz3?@Gxiy0;387b^oT=V%%3u3sOAENb*z3_uE;n2<>OmkKg zMvWH>DaJzD#n@gKX3Q4mN(o_A!HBnfqYwX_z8m@Vc7(P)^6qs+N9XWjBWwQT zreMUVvQ&;PU^W7IIXWI4jBtS-HJ!l``SIlZNLy(~rijGa{6I%{%%AL>BI<= zGa)CGSPma#!lcYe4xjEYRg~m{*a;)lc8(L^$CBKCxufn#HCkwsQ*9?o>iw||9+;?j z0P+;L=;y&u)8>H!2uVaPt>eMQco3B4Qn3lOy3HdF7n0{(fee68Gii_J+~k~+i4rEx z1Ls1g`05K2Xe_6R+h)MY_bSC!#zBIgsP@%l_R?rUg{AEkZ@QELY9 zVHI#BB5EUeF3>*WvyL!Ljo1Ls#jiA=Q3S(6E>Y3Dq8$nZ#) zr3!m>DuYXMno}0zu=#FsYin+;v(f~wIyyfJUpV`LCww`6{9#ZWeAoYIhA;QK0ff^E z2$07qhY%TqMmo>59(DTcKL^m>Fm?<&_fBu9=B?zJW}-u0F^Vd{;0r zzGvb#=j7at(slm^oC@+1qqb@9EDaqJb6P_mA!W%b#F9C@&!M4;%hIcQ zGHKpS7#2e!VReF!%2$d?L{{N~1)mWSK?IRQB+(m1Pz@RZ|FWJ~1_9d|yG^Sq6BsEx z7CQ~ot1TX(JTm)~7@PaAh=*bZXai24;|wX?flf&U9mu3(cxli%tDplpO&J?N7f;t1 zq40o;U`ruKS7~$0CY;e;jS;ojfjmxQxAtk+DWn;>|9ZNGF&ttHIpM@aOaZu}ILciUBGoo#qbkfm zPE+3T#bCCzeOl&xVn1d{;+RXsK{IT4mrDt!{$2lLHNN&27r{lTu#QY78OE|$?8sH_ zlE_#xOucM73vfuEl$qeW9OuUFCs9qLx<;zgPVE_ya%-6JE-a8qm1KC2==_2Tfq&K1 zxp>+zWK7O6Y-r-CkHWt`^9SW>9YztrQG=?(qdou;TTy`rGN}*%KCYY^^Gn3uT^|%D zIn+Ls0c}+1-x}_Xakq59SH!(h&T?EfQqWBe-3%;yFAF*+6?E=0=kG`AoL1V-U7VB( z@u1Z3P0&HF>v9Do>5l|YOfHhz&Kaw~19^5iIy=tKQVb~FZ?1udhQT)9evyX49Ku45 z+6LM>8NCr)hf>d>A<46u%gQ zAw45e$V#iyPy(4QtjD7)7fcZ!WpPpggOC<-8QD(X_e-gGB2@8UvYMxH-2fQsIDwqL zJb==)byKb_NByx~)cQe(PugKyY(>OP< zH7Bj1f<|Yd0bbYkqWMtW?InSSNK>;=yDI6l>*sNw+Oy$EeZbU-mm)4zxdc_yew@bE zIE{U=I5{U3PNdWOawZy+aGX+z*xVXLX=9%j!a3M}XPmWEp^X$Z0ydU$rE`~2ZJRho+M^4*fo&`>RdXohkX?`9yn9%5m9081D(!Vbabzi zyX?z!ZOCnKL)!5Os1Pn-;=)u9QV#oWDj@ZAvJ}jjiFsiLo*ycVZex)IEUrC+=a@Tn zBKfYCLLpMYnTjNEng&jjUM`*!rxrC%jXTEEmxWyv6pGS zU#_9YtvL;l|23g1QA1TC?alh2YUr(wK=oA_>o()WP)I=VGNgoB%|HbeO8Dhc8|Y;C zM>&bAEo4hDhcO+H*qbX^NC%;O)uXT0Eij^I^10g`uV6}-YJD1D7PSi0(CLe1XCTCH53PH*TuGuFzddOB(M z&yLkJunxG>XT_?eu%%o*o!sRCryI?snOlsaPhSw?GDB7Z;JxbmTfBLJh4+B|XrGZDPKP z=UO1tGH=G+5$}OO)Uew>gw(gcy5@iqYTH>syn|H<2eqV=MiuYMh-{KWQiEF*&@4H) z+(Rxtk=QN+g^V1#!y4eQ3R;3g`uK_4q)uG8uQO?fYM~6xTp7C3%-#qJ7YMAisPLqP z-B(Y>3_Ru*I%)$wu=C9OtVm2cyFsof1yczkW^BmbDHgM^MnU6s3Wrb28mpE=a(zCm z99o!l^#qxtlWm+#M9bzJd!ZX`oVe#)?mK%>4fB@xtb;qbLZRS!%koJ(PbYUnD2X?F zQ?8x9wc(TaT z2ei!BVn=$Qni|y(Fn_^2H))~iVuL~z*46t}SIQ7Mm2_Hw|66cM)!-bR-h0;3_GsbC z`MNd<%ah1(jKvx??pU-Z^}i06zLz*f001A02m}BC000301^_}s0stET0{{R300000 F008E46X*Z{ literal 0 HcmV?d00001 diff --git a/tests/pipelines/EmpiricalSNPCalling/inputs_poly.json b/tests/pipelines/EmpiricalSNPCalling/inputs_poly.json index 2298a04..d219225 100644 --- a/tests/pipelines/EmpiricalSNPCalling/inputs_poly.json +++ b/tests/pipelines/EmpiricalSNPCalling/inputs_poly.json @@ -1,14 +1,18 @@ { "SNPCalling.max_cores": 2, + "SNPCalling.max_ram": 2000, "SNPCalling.ploidy": 4, + "SNPCalling.enzyme": "NgoMIV", "SNPCalling.rm_dupli": false, "SNPCalling.replaceAD": false, - "SNPCalling.run_gatk": true, - "SNPCalling.run_freebayes": true, + "SNPCalling.run_tassel": true, + "SNPCalling.run_stacks": true, + "SNPCalling.run_gatk": false, + "SNPCalling.run_freebayes": false, "SNPCalling.hardfilters": true, "SNPCalling.n_chrom": 1, - "SNPCalling.chunk_size": 2, - "SNPCalling.samples_info": "tests/data/polyploid/fastq/samples_info.txt", + "SNPCalling.chunk_size": 1, + "SNPCalling.samples_info": "tests/data/polyploid/fastq/single_end/samples_info.txt", "SNPCalling.gatk_mchap": false, "SNPCalling.references": { "ref_fasta": "tests/data/polyploid/RchinensisV1.0/Chr04_sub.fasta", From f375da7606d7f048854c07490c6da352976d2673 Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Fri, 1 Dec 2023 08:43:51 -0600 Subject: [PATCH 4/9] multiple global errors mappoly --- pipelines/EmpiricalMaps/EmpiricalMaps.wdl | 14 +++++++++----- subworkflows/mappoly_maps_empirical.wdl | 4 +++- tasks/mappoly.wdl | 14 ++++++++++---- .../polyploid/EmpiricalMaps_inputs.json | 13 ++++++++----- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl index 90df7f2..590b780 100644 --- a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl +++ b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl @@ -189,7 +189,8 @@ workflow Maps { max_cores = max_cores, ploidy = ploidy, prob_thres = prob_thres, - filt_segr = filt_segr + filt_segr = filt_segr, + global_errors = global_errors } } @@ -206,7 +207,8 @@ workflow Maps { max_cores = max_cores, ploidy = ploidy, prob_thres = prob_thres, - filt_segr = filt_segr + filt_segr = filt_segr, + global_errors = global_errors } } @@ -223,7 +225,8 @@ workflow Maps { max_cores = max_cores, ploidy = ploidy, prob_thres = prob_thres, - filt_segr = filt_segr + filt_segr = filt_segr, + global_errors = global_errors } } @@ -237,9 +240,10 @@ workflow Maps { parent1 = dataset.parent1, parent2 = dataset.parent2, max_cores = max_cores, - ploidy = ploidy, + ploidy = ploidy, prob_thres = prob_thres, - filt_segr = filt_segr + filt_segr = filt_segr, + global_errors = global_errors } } } diff --git a/subworkflows/mappoly_maps_empirical.wdl b/subworkflows/mappoly_maps_empirical.wdl index 91dfefa..6c531f6 100644 --- a/subworkflows/mappoly_maps_empirical.wdl +++ b/subworkflows/mappoly_maps_empirical.wdl @@ -17,6 +17,7 @@ workflow MappolyMapsEmp { Int max_cores Int ploidy String? filt_segr + Array[String] global_errors } call utilsR.ReGenotyping { @@ -43,7 +44,8 @@ workflow MappolyMapsEmp { max_cores = max_cores, ploidy = ploidy, prob_thres = prob_thres, - filt_segr = filt_segr + filt_segr = filt_segr, + global_errors = global_errors } output { diff --git a/tasks/mappoly.wdl b/tasks/mappoly.wdl index 99f0cfb..a6d2c4c 100644 --- a/tasks/mappoly.wdl +++ b/tasks/mappoly.wdl @@ -12,6 +12,7 @@ task MappolyReport { Int max_cores Int ploidy String filt_segr = "TRUE" + Array[String] global_errors = ["0.05"] } Int disk_size = ceil(size(vcf_file, "GiB") * 2) @@ -22,14 +23,14 @@ task MappolyReport { library(mappoly) - if("~{GenotypeCall_program}" == "supermassa") prob.thres = ~{prob_thres} - 0.3 else prob.thres = ~{prob_thres} + if("~{GenotypeCall_program}" == "supermassa") prob.thres = ~{prob_thres} - ~{prob_thres}*0.3 else prob.thres = ~{prob_thres} dat <- read_vcf(file = "~{vcf_file}", parent.1 = "~{parent1}", parent.2 = "~{parent2}", verbose = FALSE, read.geno.prob = TRUE, - prob.thres = prob.thres, + prob.thres = ~{prob.thres}, ploidy = ~{ploidy}) dat <- filter_missing(input.data = dat, type = "marker", @@ -83,7 +84,13 @@ task MappolyReport { # Get last interaction iter <- length(res[[2]][[1]]) - map_error <- est_full_hmm_with_global_error(res[[2]][[1]][[iter]], error = 0.05, verbose = FALSE) + + global_errors <- unlist(strsplit("~{sep="," global_errors}", ",")) + map_error <- list() + for(i in 1:length(global_errors)){ + map_error[[i]] <- est_full_hmm_with_global_error(res[[2]][[1]][[iter]], error = as.numeric(global_errors[i]), verbose = FALSE) + saveRDS(map_error, file= paste0("~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_map_error_",global_errors[i],".rds")) + } map_prob <- est_full_hmm_with_prior_prob(res[[2]][[1]][[iter]], dat.prob = dat, verbose = FALSE) # Diagnostic graphics - overall from plot(dat) @@ -94,7 +101,6 @@ task MappolyReport { saveRDS(dat, file= "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_dat.rds") saveRDS(mat2, file="~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_mat2.rds") saveRDS(seq_mds, file="~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_seq_mds.rds") - saveRDS(map_error, file="~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_map_error.rds") saveRDS(map_prob, file= "~{SNPCall_program}_~{GenotypeCall_program}_~{CountsFrom}_map_prob.rds") system("mkdir results") diff --git a/tests/pipelines/EmpiricalMaps/polyploid/EmpiricalMaps_inputs.json b/tests/pipelines/EmpiricalMaps/polyploid/EmpiricalMaps_inputs.json index e4210cf..4c82856 100644 --- a/tests/pipelines/EmpiricalMaps/polyploid/EmpiricalMaps_inputs.json +++ b/tests/pipelines/EmpiricalMaps/polyploid/EmpiricalMaps_inputs.json @@ -9,11 +9,14 @@ "multiallelics": "false" }, "Maps.max_cores": "2", - "Maps.gatk_mchap": "false", + "Maps.run_supermassa":true, + "Maps.run_updog":true, + "Maps.run_polyrad":true, + "Maps.gatk_mchap": "FALSE", "Maps.vcfs_counts_source": ["vcf"], "Maps.vcfs_software": ["gatk"], "Maps.filter_noninfo": "true", - "Maps.vcfs": ["gatk_Chr04_filt_example.vcf.gz"], - "Maps.replaceADbyMissing": "TRUE", - "Maps.prob_thres": 0.8 -} \ No newline at end of file + "Maps.vcfs": ["tests/data/polyploid/vcfs_norm/gatk_Chr04_filt_example.vcf.gz"], + "Maps.replaceADbyMissing": "TRUE", + "Maps.global_errors":["0.05"] +} From c41c4ff41b9755ee7894a34151756140f48a37d2 Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Fri, 1 Dec 2023 10:30:15 -0600 Subject: [PATCH 5/9] poly new outputs for app --- .dockerfiles/reads2map/Dockerfile | 2 +- tasks/mappoly.wdl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.dockerfiles/reads2map/Dockerfile b/.dockerfiles/reads2map/Dockerfile index 867b9a4..63ea5ca 100644 --- a/.dockerfiles/reads2map/Dockerfile +++ b/.dockerfiles/reads2map/Dockerfile @@ -60,4 +60,4 @@ RUN Rscript -e 'remotes::install_github("Cristianetaniguti/onemap")' # Still privates RUN Rscript -e 'remotes::install_github("Cristianetaniguti/simuscopR")' RUN Rscript -e 'remotes::install_github("Cristianetaniguti/Reads2MapTools")' -RUN Rscript -e 'remotes::install_github("mmollin/MAPpoly")' +RUN Rscript -e 'remotes::install_github("Cristianetaniguti/MAPpoly")' diff --git a/tasks/mappoly.wdl b/tasks/mappoly.wdl index a6d2c4c..edc53e2 100644 --- a/tasks/mappoly.wdl +++ b/tasks/mappoly.wdl @@ -30,7 +30,7 @@ task MappolyReport { parent.2 = "~{parent2}", verbose = FALSE, read.geno.prob = TRUE, - prob.thres = ~{prob.thres}, + prob.thres = prob.thres, ploidy = ~{ploidy}) dat <- filter_missing(input.data = dat, type = "marker", @@ -79,7 +79,7 @@ task MappolyReport { init.LOD = 100, max.rounds = 3, size.rem.cluster = 3, - gap.threshold = 3, + gap.threshold = 20, verbose = FALSE) # Get last interaction @@ -113,8 +113,8 @@ task MappolyReport { >>> runtime { - docker:"cristaniguti/reads2map:0.0.9" - singularity: "docker://cristaniguti/reads2map:0.0.9" + docker:"cristaniguti/reads2map:0.1.0" + singularity: "docker://cristaniguti/reads2map:0.1.0" cpu: max_cores # Cloud memory:"~{memory_size} MiB" From 97c5815bfe136ece5b2e222300d5013b81eee470 Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Sat, 2 Dec 2023 15:24:48 -0600 Subject: [PATCH 6/9] bugfix --- tasks/JointReports.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/JointReports.wdl b/tasks/JointReports.wdl index 0bfa0b7..6b0bb25 100644 --- a/tasks/JointReports.wdl +++ b/tasks/JointReports.wdl @@ -33,7 +33,7 @@ task JointAllReports{ supermassaPolyMaps <- str_split("~{sep=';' supermassaPolyMaps}", ";", simplify = TRUE) all_files <- c(SNPCallerPolyMapsEmp, updogPolyMaps, polyradPolyMaps, supermassaPolyMaps) - all_files <- all_files[-which(all_files == "")] + if(length(which(all_files == "")) > 0) all_files <- all_files[-which(all_files == "")] system("mkdir results_all") From a495ee8774da840ff7bc9fe84c8dba3486d72267 Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Sat, 2 Dec 2023 21:29:40 -0600 Subject: [PATCH 7/9] remove LargeList --- tasks/JointReports.wdl | 31 +++++++------- tasks/gusmap.wdl | 11 +++-- tasks/mappoly.wdl | 4 +- tasks/utilsR.wdl | 94 +++++++++++++++++++----------------------- 4 files changed, 64 insertions(+), 76 deletions(-) diff --git a/tasks/JointReports.wdl b/tasks/JointReports.wdl index 6b0bb25..4c791c6 100644 --- a/tasks/JointReports.wdl +++ b/tasks/JointReports.wdl @@ -64,7 +64,7 @@ task JointAllReports{ list_files <- untar(files[[i]][[j]], exdir = path_dir, list = T) system(paste0("mv ",path_dir, "/",list_files[1], "*_map_report.tsv.gz ", path_dir, "/maps")) system(paste0("mv ",path_dir, "/",list_files[1], "*_times_report.tsv.gz ", path_dir, "/times")) - system(paste0("mv ",path_dir, "/",list_files[1], "*.RData ", path_dir, "/RDatas")) + system(paste0("mv ",path_dir, "/",list_files[1], "*.rds ", path_dir, "/RDatas")) if(!grepl("gusmap", list_files[1])){ system(paste0("mv ",path_dir, "/",list_files[1], "*_filters_report.tsv.gz ", path_dir, "/filters")) system(paste0("mv ",path_dir, "/",list_files[1], "*_errors_report.tsv.gz ", path_dir, "/errors")) @@ -150,7 +150,6 @@ task JointReports{ library(tidyr) library(stringr) library(vroom) - library(largeList) SNPCaller <- str_split("~{sep=";" SNPCaller}", ";", simplify = T) updog <- str_split("~{sep=";" updog}", ";", simplify = T) @@ -168,7 +167,7 @@ task JointReports{ list_files <- untar(files[[i]][[j]], exdir = path_dir, list = T) system(paste0("mv ",path_dir, "/",list_files[1], "*_map_report.tsv.gz ", path_dir, "/maps")) system(paste0("mv ",path_dir, "/",list_files[1], "*_times_report.tsv.gz ", path_dir, "/times")) - system(paste0("mv ",path_dir, "/",list_files[1], "*.RData ", path_dir, "/RDatas")) + system(paste0("mv ",path_dir, "/",list_files[1], "*.rds ", path_dir, "/RDatas")) if(!grepl("gusmap", list_files[1])){ system(paste0("mv ",path_dir, "/",list_files[1], "*_filters_report.tsv.gz ", path_dir, "/filters")) system(paste0("mv ",path_dir, "/",list_files[1], "*_errors_report.tsv.gz ", path_dir, "/errors")) @@ -212,11 +211,11 @@ task JointReports{ class(RDatas[[i]]) <- "list" } - saveList(RDatas, file = "sequences_emp.llo", append=FALSE, compress=TRUE) + saveRDS(RDatas, file = "sequences_emp.rds") new_names <- names(all_RDatas) vroom_write(as.data.frame(new_names), "names.tsv.gz") - save(gusmap_RDatas, file = "gusmap_RDatas.RData") + saveRDS(gusmap_RDatas, file = "gusmap_RDatas.rds") # Outputs vroom_write(errors, "data1_depths_geno_prob.tsv.gz", num_threads = ~{max_cores}) @@ -225,7 +224,7 @@ task JointReports{ vroom_write(times, "data4_times.tsv.gz", num_threads = ~{max_cores}) system("mkdir EmpiricalReads_results") - system("mv gusmap_RDatas.RData sequences_emp.llo data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results") + system("mv gusmap_RDatas.rds sequences_emp.rds data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results") system("tar -czvf EmpiricalReads_results.tar.gz EmpiricalReads_results") RSCRIPT @@ -275,7 +274,6 @@ task JointReportsSimu { library(tidyr) library(stringr) library(vroom) - library(largeList) library(vcfR) SNPCaller <- str_split("~{sep=";" SNPCaller}", ";", simplify = T) @@ -296,7 +294,7 @@ task JointReportsSimu { list_files <- untar(files[[i]][[j]], exdir = path_dir, list = T) system(paste0("mv ",path_dir, "/",list_files[1], "*_map_report.tsv.gz ", path_dir, "/maps")) system(paste0("mv ",path_dir, "/",list_files[1], "*_times_report.tsv.gz ", path_dir, "/times")) - system(paste0("mv ",path_dir, "/",list_files[1], "*.RData ", path_dir, "/RDatas")) + system(paste0("mv ",path_dir, "/",list_files[1], "*.rds ", path_dir, "/RDatas")) if(!grepl("gusmap", list_files[1])){ system(paste0("mv ",path_dir, "/",list_files[1], "*_filters_report.tsv.gz ", path_dir, "/filters")) system(paste0("mv ",path_dir, "/",list_files[1], "*_errors_report.tsv.gz ", path_dir, "/errors")) @@ -361,11 +359,11 @@ task JointReportsSimu { class(RDatas[[i]]) <- "list" } - saveList(RDatas, file = "data6_RDatas.llo", append=FALSE, compress=TRUE) + saveRDS(RDatas, file = "data6_RDatas.rds") new_names <- names(all_RDatas) vroom_write(as.data.frame(new_names), "names.tsv.gz") - save(gusmap_RDatas, file = "gusmap_RDatas.RData") + saveRDS(gusmap_RDatas, file = "gusmap_RDatas.rds") # Outputs vroom_write(tsvs[[3]], "data1_depths_geno_prob.tsv.gz", num_threads = ~{max_cores}) @@ -401,8 +399,8 @@ task JointReportsSimu { File data3_filters = "data3_filters.tsv.gz" File data4_times = "data4_times.tsv.gz" File data5_SNPCall_efficiency = "data5_SNPCall_efficiency.tsv.gz" - File data6_RDatas = "data6_RDatas.llo" - File data7_gusmap = "gusmap_RDatas.RData" + File data6_RDatas = "data6_RDatas.rds" + File data7_gusmap = "gusmap_RDatas.rds" File data8_names = "names.tsv.gz" File data10_counts = "data10_CountVariants.tsv.gz" } @@ -432,7 +430,6 @@ task JointTablesSimu{ R --vanilla --no-save < 4) cores = 4 else cores = ~{max_cores} - filtered_onemap <- load("~{onemap_obj}") - filtered_onemap <- get(filtered_onemap) - - simu_onemap_obj <- load("~{simu_onemap_obj}") - simu_onemap_obj <- get(simu_onemap_obj) + filtered_onemap <- readRDS("~{onemap_obj}") + + simu_onemap_obj <- readRDS("~{simu_onemap_obj}") + ref_alt_alleles <- read.table("~{ref_alt_alleles}") simulated_phases <- read.table("~{simulated_phases}") @@ -244,7 +242,7 @@ task MapsReport { RDatas_joint[[2]] <- info_correct[[1]] names(RDatas_joint) <- c("map_~{seed}_~{depth}_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_TRUE", "map_~{seed}_~{depth}_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_FALSE") - save(RDatas_joint, file= "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.RData") + saveRDS(RDatas_joint, file= "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.rds") # Joint times data.frames times_temp <- data.frame(seed = ~{seed}, depth = ~{depth}, SNPCall = "~{SNPCall_program}", @@ -279,7 +277,7 @@ task MapsReport { output { File maps_report = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}_map_report.tsv.gz" - File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.RData" + File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.rds" File times = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}_times_report.tsv.gz" } } @@ -306,12 +304,10 @@ task ErrorsReport { library(onemap) library(tidyverse) - temp <- load("~{onemap_obj}") - df <- get(temp) - - temp <- load("~{vcfR_obj}") - vcf <- get(temp) - + df <- readRDS("~{onemap_obj}") + + vcf <- readRDS("~{vcfR_obj}") + p <- create_depths_profile(onemap.obj = df, vcfR.object = vcf, parent1 = "P1", parent2 = "P2", vcf.par = "AD",recovering = FALSE, GTfrom = "onemap", alpha=0.1, @@ -321,8 +317,7 @@ task ErrorsReport { df <- cbind(seed = "~{seed}" , depth = "~{depth}", SNPCall = "~{SNPCall_program}", CountsFrom = "~{CountsFrom}", GenoCall="~{GenotypeCall_program}", df) - simu <- load("~{simu_vcfR}") - vcf_simu <- get(simu) + vcf_simu <- readRDS("~{simu_vcfR}") gt.simu <- vcf_simu@gt[,-1] gt.simu <- as.data.frame(cbind(mks = vcf_simu@fix[,3], gt.simu)) @@ -393,12 +388,10 @@ task CheckDepths { R --vanilla --no-save < 4) cores = 4 else cores = ~{max_cores} times_temp <- system.time(df <- create_map_report_emp(input.seq = sequence, CountsFrom = "~{CountsFrom}", @@ -465,7 +457,7 @@ task MapsReportEmp { vroom::vroom_write(df[[2]], "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_map_report.tsv.gz", num_threads = ~{max_cores}) map_out <- df[[1]] - save(map_out, file = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.RData") + saveRDS(map_out, file = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.rds") times <- data.frame(SNPCall = "~{SNPCall_program}", CountsFrom = "~{CountsFrom}", @@ -498,7 +490,7 @@ task MapsReportEmp { output { File maps_report = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_map_report.tsv.gz" - File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.RData" + File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.rds" File times = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_times_report.tsv.gz" } } @@ -637,7 +629,7 @@ task SetProbs { } vcf <- read.vcfR("~{vcf_file}") - save(vcf, file = "vcfR.RData") + saveRDS(vcf, file = "vcfR.rds") if("~{multiallelics}") only_biallelic = FALSE else only_biallelic = TRUE @@ -698,8 +690,8 @@ task SetProbs { for(i in 1:length(probs_onemap_obj)){ probs_onemap <- probs_onemap_obj[[i]] - save(probs_onemap, file= paste0("probs_onemap_", - names(probs_onemap_obj)[i], ".RData")) + saveRDS(probs_onemap, file= paste0("probs_onemap_", + names(probs_onemap_obj)[i], ".rds")) } cat(names(probs_onemap_obj)) @@ -733,9 +725,9 @@ task SetProbs { } output { - Array[File] probs_onemap_obj = glob("probs_onemap_*.RData") + Array[File] probs_onemap_obj = glob("probs_onemap_*.rds") Array[String] probs_onemap_obj_names = read_lines("names.txt") - File vcfR_obj = "vcfR.RData" + File vcfR_obj = "vcfR.rds" } } @@ -770,7 +762,7 @@ task SetProbsDefault { } if(as.logical("~{mchap}") & "~{SNPCall_program}" == "gatk") vcf <- read.vcfR("~{multiallelics_mchap}") else vcf <- read.vcfR("~{vcf_file}") - save(vcf, file = "vcfR.RData") + saveRDS(vcf, file = "vcfR.rds") if("~{multiallelics}") only_biallelic = FALSE else only_biallelic = TRUE @@ -798,9 +790,9 @@ task SetProbsDefault { globalerror_onemap_obj <- create_probs(input.obj = onemap.obj, global_error = 0.05) default_onemap_obj <- create_probs(input.obj = onemap.obj, global_error = 10^(-5)) - save(default_onemap_obj, file="default_onemap_obj.RData") - save(probs_onemap_obj, file="probs_onemap_obj.RData") - save(globalerror_onemap_obj, file="globalerror_onemap_obj.RData") + saveRDS(default_onemap_obj, file="default_onemap_obj.rds") + saveRDS(probs_onemap_obj, file="probs_onemap_obj.rds") + saveRDS(globalerror_onemap_obj, file="globalerror_onemap_obj.rds") RSCRIPT @@ -825,10 +817,10 @@ task SetProbsDefault { } output { - File probs_onemap_obj = "probs_onemap_obj.RData" - File globalerror_onemap_obj = "globalerror_onemap_obj.RData" - File default_onemap_obj = "default_onemap_obj.RData" - File vcfR_obj = "vcfR.RData" + File probs_onemap_obj = "probs_onemap_obj.rds" + File globalerror_onemap_obj = "globalerror_onemap_obj.rds" + File default_onemap_obj = "default_onemap_obj.rds" + File vcfR_obj = "vcfR.rds" } } From ded083d6e64625ca92eee9bd957ed3cd040d9943 Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Mon, 4 Dec 2023 10:03:23 -0600 Subject: [PATCH 8/9] update version and solve #107 and #106 --- README.md | 2 +- pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md | 7 +++++++ .../EmpiricalReads2Map/EmpiricalReads2Map.changelog.md | 6 ++++++ .../EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md | 5 +++++ tasks/tassel.wdl | 8 ++++++-- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 43be2ad..0c234e3 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ The Reads2Map workflows perform the SNP and genotype/dosage calling for your com Multiple systems are available to run WDL workflows such as Cromwell, miniWDL, and dxWDL. See further information in the [openwdl documentation](https://github.com/openwdl/wdl#execution-engines). -In addition, we also suggest two wrappers: [cromwell-cli](https://github.com/lmtani/cromwell-cli) and [Caper](https://github.com/ENCODE-DCC/caper). Here is a tutorial on how to setup these tools and one example running the EmpiricalReads2Map: +In addition, we also suggest two wrappers: [pumbaa](https://github.com/lmtani/pumbaa) and [Caper](https://github.com/ENCODE-DCC/caper). Here is a tutorial on how to setup these tools and one example running the EmpiricalReads2Map: * [Setup and run Reads2Map workflows](https://cristianetaniguti.github.io/Tutorials/Reads2Map/Setup_and_run_Reads2Map_workflows.html) diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md b/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md index 2580c16..1447e70 100644 --- a/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md +++ b/pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md @@ -1,3 +1,10 @@ +# 1.3.0 + +* Add MAPpoly new functions framework_map and update_framework_map +* Update tests +* Polyploid analysis output compatible with Reads2MapApp v0.0.1 +* Remove LargeList deprecated package as dependency + # 1.2.5 * more flexibility to choose the probability to be used in the HMM: diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md index ae5b43a..d14db82 100644 --- a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md +++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md @@ -1,3 +1,9 @@ +# 1.5.0 + +* Add MAPpoly new functions framework_map and update_framework_map +* Polyploid analysis output compatible with Reads2MapApp v0.0.1 +* Remove LargeList deprecated package as dependency + # 1.4.3 * Update example for pair-end reads inputs diff --git a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md index 07e2c36..cca2626 100644 --- a/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md +++ b/pipelines/EmpiricalSNPCalling/EmpiricalSNPCalling.changelog.md @@ -1,3 +1,8 @@ +# 1.4.3 + +* Adapt tassel and stacks tasks also for polyploids +* Update tests + # 1.4.2 * Update example for pair-end reads inputs diff --git a/tasks/tassel.wdl b/tasks/tassel.wdl index deb8a99..baf4c0c 100644 --- a/tasks/tassel.wdl +++ b/tasks/tassel.wdl @@ -167,7 +167,11 @@ task BarcodeFaker { file_names <- "~{sep=',' fastq}" file_names <- unlist(strsplit(file_names, ",")) is_gz <- basename(file_names[1]) - if(grepl("gz", is_gz)) system(paste("gunzip", file_names)) + if(grepl(".gz", is_gz)) { + for(i in 1:length(file_names)){ + system(paste("gunzip", file_names[i])) + } + } dir_name <- dirname(file_names[1]) sample_names <- "~{sep=',' FullSampleName}" @@ -188,7 +192,7 @@ task BarcodeFaker { # Slurm job_name: "BarcodeFaker" mem:"2G" - time: 4 + time: 24 } meta { From 7fa8a825aed470a193dd4e3b81ca40e7a9d4e5ba Mon Sep 17 00:00:00 2001 From: cristianetaniguti Date: Mon, 4 Dec 2023 10:08:24 -0600 Subject: [PATCH 9/9] fix changelog --- pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md index d14db82..b450238 100644 --- a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md +++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md @@ -1,5 +1,7 @@ # 1.5.0 +* Adapt tassel and stacks tasks also for polyploids +* Update tests * Add MAPpoly new functions framework_map and update_framework_map * Polyploid analysis output compatible with Reads2MapApp v0.0.1 * Remove LargeList deprecated package as dependency