diff --git a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl index da6d89e..6a90acd 100644 --- a/pipelines/EmpiricalMaps/EmpiricalMaps.wdl +++ b/pipelines/EmpiricalMaps/EmpiricalMaps.wdl @@ -27,6 +27,8 @@ workflow Maps { String? filters Int max_cores Int ploidy + Float? prob_thres + String? filt_segr } if (defined(filters)) { @@ -155,7 +157,9 @@ workflow Maps { parent1 = dataset.parent1, parent2 = dataset.parent2, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + prob_thres = prob_thres, + filt_segr = filt_segr } call mappoly_sub.MappolyMapsEmp as polyradPolyMaps { @@ -168,7 +172,9 @@ workflow Maps { parent1 = dataset.parent1, parent2 = dataset.parent2, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + prob_thres = prob_thres, + filt_segr = filt_segr } call mappoly_sub.MappolyMapsEmp as supermassaPolyMaps { @@ -181,7 +187,9 @@ workflow Maps { parent1 = dataset.parent1, parent2 = dataset.parent2, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + prob_thres = prob_thres, + filt_segr = filt_segr } if(vcfs_counts_source[idx] != "bam"){ @@ -194,7 +202,9 @@ workflow Maps { parent1 = dataset.parent1, parent2 = dataset.parent2, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + prob_thres = prob_thres, + filt_segr = filt_segr } } } @@ -216,14 +226,13 @@ workflow Maps { } if(ploidy > 2){ - Array[File] snpcaller_results_poly = select_all(MappolyReport.results) - + call reports.JointReportsPoly { input: - SNPCaller = snpcaller_results_poly, - updog = updogPolyMaps.tar_gz_report, - polyrad = polyradPolyMaps.tar_gz_report, - supermassa = supermassaPolyMaps.tar_gz_report + SNPCaller = select_all(MappolyReport.results), + updog = select_all(updogPolyMaps.tar_gz_report), + polyrad = select_all(polyradPolyMaps.tar_gz_report), + supermassa = select_all(supermassaPolyMaps.tar_gz_report) } } diff --git a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl index 1fefaeb..dd3f408 100644 --- a/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl +++ b/pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl @@ -24,6 +24,8 @@ workflow EmpiricalReads { Int ploidy = 2 Int n_chrom String? filters + Float? prob_thres + String? filt_segr } call snpcalling.SNPCalling { @@ -54,7 +56,9 @@ workflow EmpiricalReads { filters = filters, max_cores = max_cores, replaceADbyMissing = replaceADbyMissing, - ploidy = ploidy + ploidy = ploidy, + prob_thres = prob_thres, + filt_segr = filt_segr } output { diff --git a/subworkflows/mappoly_maps_empirical.wdl b/subworkflows/mappoly_maps_empirical.wdl index 360c5f8..5b02c5a 100644 --- a/subworkflows/mappoly_maps_empirical.wdl +++ b/subworkflows/mappoly_maps_empirical.wdl @@ -13,8 +13,10 @@ workflow MappolyMapsEmp { String cross String parent1 String parent2 + Float? prob_thres Int max_cores Int ploidy + String? filt_segr } call utilsR.ReGenotyping { @@ -37,7 +39,9 @@ workflow MappolyMapsEmp { SNPCall_program = SNPCall_program, CountsFrom = CountsFrom, max_cores = max_cores, - ploidy = ploidy + ploidy = ploidy, + prob_thres = prob_thres, + filt_segr = filt_segr } output { diff --git a/tasks/JointReports.wdl b/tasks/JointReports.wdl index 91d88d8..2d7a69a 100644 --- a/tasks/JointReports.wdl +++ b/tasks/JointReports.wdl @@ -387,10 +387,10 @@ task JointTablesSimu{ task JointReportsPoly{ input{ - Array[File?] SNPCaller - Array[File?] updog - Array[File?] polyrad - Array[File?] supermassa + Array[File] SNPCaller + Array[File] updog + Array[File] polyrad + Array[File] supermassa } Int disk_size = ceil(size(SNPCaller, "GiB") * 1.5 + size(updog, "GiB") * 1.5 + size(polyrad, "GiB") * 1.5 + size(supermassa, "GiB") * 1.5) @@ -398,32 +398,32 @@ task JointReportsPoly{ command <<< - snpcaller=(~{sep=" " SNPCaller}) - updog=(~{sep=" " updog}) - polyrad=(~{sep=" " polyrad}) - supermassa=(~{sep=" " supermassa}) + snpcaller=( ~{sep=" " SNPCaller} ) + updog=( ~{sep=" " updog} ) + polyrad=( ~{sep=" " polyrad} ) + supermassa=( ~{sep=" " supermassa} ) mkdir results_all - for index in ${!snpcaller[*]}; + for index in ${!snpcaller[*]}; do tar -xvf ${snpcaller[$index]} mv results/* results_all rm -r results done - for index in ${!updog[*]}; + for index in ${!updog[*]}; do tar -xvf ${updog[$index]} mv results/* results_all rm -r results done - for index in ${!polyrad[*]}; + for index in ${!polyrad[*]}; do tar -xvf ${polyrad[$index]} mv results/* results_all rm -r results done - for index in ${!supermassa[*]}; + for index in ${!supermassa[*]}; do tar -xvf ${supermassa[$index]} mv results/* results_all rm -r results diff --git a/tasks/freebayes.wdl b/tasks/freebayes.wdl index 9e71dd2..868f3d4 100644 --- a/tasks/freebayes.wdl +++ b/tasks/freebayes.wdl @@ -12,7 +12,7 @@ task RunFreebayes { } Int disk_size = ceil(size(reference, "GiB") + size(bam, "GiB") + 50) - Int memory_size = ceil(size(bam, "MiB") * 25 * max_cores + 10000) + Int memory_size = ceil(size(bam, "MiB") * 4 * max_cores + 10000) command <<< diff --git a/tasks/mappoly.wdl b/tasks/mappoly.wdl index 3f528a2..8d61bed 100644 --- a/tasks/mappoly.wdl +++ b/tasks/mappoly.wdl @@ -8,8 +8,10 @@ task MappolyReport { String CountsFrom String parent1 String parent2 + Float prob_thres = 0.8 Int max_cores Int ploidy + String filt_segr = "TRUE" } Int disk_size = ceil(size(vcf_file, "GiB") * 2) @@ -25,7 +27,7 @@ task MappolyReport { parent.2 = "~{parent2}", verbose = FALSE, read.geno.prob = TRUE, - prob.thres = 0.8, ploidy = ~{ploidy}) + prob.thres = ~{prob_thres}, ploidy = ~{ploidy}) png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_raw_data.png")) plot(dat) @@ -34,12 +36,16 @@ task MappolyReport { dat <- filter_missing(input.data = dat, type = "marker", filter.thres = 0.25, inter = FALSE) - pval.bonf <- 0.05/dat[[3]] - mrks.chi.filt <- filter_segregation(dat, - chisq.pval.thres = pval.bonf, - inter = FALSE) + if("~{filt_segr}"){ + pval.bonf <- 0.05/dat[[3]] + mrks.chi.filt <- filter_segregation(dat, + chisq.pval.thres = pval.bonf, + inter = FALSE) - seq.init <- make_seq_mappoly(mrks.chi.filt) + seq.init <- make_seq_mappoly(mrks.chi.filt) + } else { + seq.init <- make_seq_mappoly(dat, "all") + } png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_","filters.png")) plot(seq.init) @@ -64,60 +70,62 @@ task MappolyReport { plot(mat, ord = s.o[[3]]) dev.off() - est.map <- est_rf_hmm_sequential(input.seq = s.o, - start.set = 5, - thres.twopt = 10, - thres.hmm = 50, - extend.tail = 30, - twopt = all.rf.pairwise, - verbose = F, - phase.number.limit = 20, - sub.map.size.diff.limit = 5) - - map.err <- est_full_hmm_with_global_error(input.map = est.map, error = 0.05) - map.prob <- est_full_hmm_with_prior_prob(input.map = est.map, dat.prob = dat) - - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_cMbyMb.png")) - plot_genome_vs_map(est.map, same.ch.lg = TRUE) - dev.off() + # est.map <- est_rf_hmm_sequential(input.seq = s.o, + # start.set = 5, + # thres.twopt = 10, + # thres.hmm = 50, + # extend.tail = 30, + # twopt = all.rf.pairwise, + # verbose = F, + # phase.number.limit = 20, + # sub.map.size.diff.limit = 5) - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"global_error_cMbyMb.png")) - plot_genome_vs_map(map.err, same.ch.lg = TRUE) - dev.off() + # map.err <- est_full_hmm_with_global_error(input.map = est.map, error = 0.05) + # map.prob <- est_full_hmm_with_prior_prob(input.map = est.map, dat.prob = dat) - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_cMbyMb.png")) - plot_genome_vs_map(map.prob, same.ch.lg = TRUE) - dev.off() + # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_cMbyMb.png")) + # plot_genome_vs_map(est.map, same.ch.lg = TRUE) + # dev.off() - summary <- summary_maps(list(est.map, map.err, map.prob)) - summary <- cbind(method = c("no_error", "global_error", "probs", "-"), summary) + # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"global_error_cMbyMb.png")) + # plot_genome_vs_map(map.err, same.ch.lg = TRUE) + # dev.off() - write.csv(summary, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_summary.csv")) + # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_cMbyMb.png")) + # plot_genome_vs_map(map.prob, same.ch.lg = TRUE) + # dev.off() - export_map_list(est.map, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_","map_file.csv")) - export_map_list(map.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_global_error_","map_file.csv")) - export_map_list(map.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_","map_file.csv")) + # summary <- summary_maps(list(est.map, map.err, map.prob)) + # summary <- cbind(method = c("no_error", "global_error", "probs", "-"), summary) - png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_draw.png")) - plot_map_list(list(default = est.map, - global = map.err, - probs = map.prob), col = "ggstyle") - dev.off() + # write.csv(summary, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_summary.csv")) - genoprob <- calc_genoprob_error(input.map = est.map, error = 0) - genoprob.err <- calc_genoprob_error(input.map = map.err, error = 0.05) - genoprob.prob <- calc_genoprob_dist(input.map = map.prob, dat.prob = dat) + # export_map_list(est.map, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_","map_file.csv")) + # export_map_list(map.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_global_error_","map_file.csv")) + # export_map_list(map.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_","map_file.csv")) - homoprobs = calc_homologprob(genoprob) - homoprobs.err = calc_homologprob(genoprob.err) - homoprobs.prob = calc_homologprob(genoprob.prob) + # png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_draw.png")) + # plot_map_list(list(default = est.map, + # global = map.err, + # probs = map.prob), col = "ggstyle") + # dev.off() - save(homoprobs, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.RData")) - save(homoprobs.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.err.RData")) - save(homoprobs.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.prob.RData")) + # genoprob <- calc_genoprob_error(input.map = est.map, error = 0) + # genoprob.err <- calc_genoprob_error(input.map = map.err, error = 0.05) + # genoprob.prob <- calc_genoprob_dist(input.map = map.prob, dat.prob = dat) + + # homoprobs = calc_homologprob(genoprob) + # homoprobs.err = calc_homologprob(genoprob.err) + # homoprobs.prob = calc_homologprob(genoprob.prob) + + # save(homoprobs, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.RData")) + # save(homoprobs.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.err.RData")) + # save(homoprobs.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.prob.RData")) system("mkdir results") - system("mv *.png *.RData *csv results") + #system("mv *.png *.RData *csv results") + system("mv *.png results") + system(paste0("tar -czvf ", "~{SNPCall_program}", "_", "~{GenotypeCall_program}", "_", "~{CountsFrom}","_results.tar.gz results")) RSCRIPT diff --git a/tests/pipelines/EmpiricalMaps/polyploid/inputs.json b/tests/pipelines/EmpiricalMaps/polyploid/inputs.json index 98aee41..a60df37 100644 --- a/tests/pipelines/EmpiricalMaps/polyploid/inputs.json +++ b/tests/pipelines/EmpiricalMaps/polyploid/inputs.json @@ -1,20 +1,18 @@ { - "Maps.dataset": { - "parent2": "GeorgeVancouver", - "name": "rose_sub", - "parent1": "MordenBlush", - "chromosome": "Chr01", - "cross": "F1", - "multiallelics": false - }, - "Maps.max_cores": 20, - "Maps.ploidy": 4, - "Maps.filters": "-r Chr01", - "Maps.gatk_mchap": "FALSE", - "Maps.vcfs_counts_source": ["vcf","vcf"], - "Maps.vcfs": ["/home/rose_lab/freebayes_tetraploid/gatk_norm.vcf.gz", - "/home/rose_lab/freebayes_tetraploid/freebayes_norm.vcf.gz"], - "Maps.vcfs_software": ["gatk", "freebayes"], - "Maps.filter_noninfo": true, - "Maps.replaceADbyMissing": "TRUE" - } + "Maps.ploidy": "4", + "Maps.dataset": { + "parent2": "P1", + "name": "roses", + "parent1": "P2", + "chromosome": "Chr04", + "cross": "F1", + "multiallelics": "false" + }, + "Maps.max_cores": "2", + "Maps.gatk_mchap": "false", + "Maps.vcfs_counts_source": ["vcf"], + "Maps.vcfs_software": ["gatk"], + "Maps.filter_noninfo": "true", + "Maps.vcfs": ["gatk_vcf_norm.vcf.gz"], + "Maps.replaceADbyMissing": "TRUE" +}