Skip to content

Commit

Permalink
remove map hmm
Browse files Browse the repository at this point in the history
  • Loading branch information
Cristianetaniguti committed Mar 9, 2023
1 parent ead0889 commit 9b3d1d0
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 93 deletions.
29 changes: 19 additions & 10 deletions pipelines/EmpiricalMaps/EmpiricalMaps.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ workflow Maps {
String? filters
Int max_cores
Int ploidy
Float? prob_thres
String? filt_segr
}

if (defined(filters)) {
Expand Down Expand Up @@ -155,7 +157,9 @@ workflow Maps {
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
}

call mappoly_sub.MappolyMapsEmp as polyradPolyMaps {
Expand All @@ -168,7 +172,9 @@ workflow Maps {
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
}

call mappoly_sub.MappolyMapsEmp as supermassaPolyMaps {
Expand All @@ -181,7 +187,9 @@ workflow Maps {
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
}

if(vcfs_counts_source[idx] != "bam"){
Expand All @@ -194,7 +202,9 @@ workflow Maps {
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
}
}
}
Expand All @@ -216,14 +226,13 @@ workflow Maps {
}

if(ploidy > 2){
Array[File] snpcaller_results_poly = select_all(MappolyReport.results)


call reports.JointReportsPoly {
input:
SNPCaller = snpcaller_results_poly,
updog = updogPolyMaps.tar_gz_report,
polyrad = polyradPolyMaps.tar_gz_report,
supermassa = supermassaPolyMaps.tar_gz_report
SNPCaller = select_all(MappolyReport.results),
updog = select_all(updogPolyMaps.tar_gz_report),
polyrad = select_all(polyradPolyMaps.tar_gz_report),
supermassa = select_all(supermassaPolyMaps.tar_gz_report)
}
}

Expand Down
6 changes: 5 additions & 1 deletion pipelines/EmpiricalReads2Map/EmpiricalReads2Map.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ workflow EmpiricalReads {
Int ploidy = 2
Int n_chrom
String? filters
Float? prob_thres
String? filt_segr
}

call snpcalling.SNPCalling {
Expand Down Expand Up @@ -54,7 +56,9 @@ workflow EmpiricalReads {
filters = filters,
max_cores = max_cores,
replaceADbyMissing = replaceADbyMissing,
ploidy = ploidy
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
}

output {
Expand Down
6 changes: 5 additions & 1 deletion subworkflows/mappoly_maps_empirical.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ workflow MappolyMapsEmp {
String cross
String parent1
String parent2
Float? prob_thres
Int max_cores
Int ploidy
String? filt_segr
}

call utilsR.ReGenotyping {
Expand All @@ -37,7 +39,9 @@ workflow MappolyMapsEmp {
SNPCall_program = SNPCall_program,
CountsFrom = CountsFrom,
max_cores = max_cores,
ploidy = ploidy
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
}

output {
Expand Down
24 changes: 12 additions & 12 deletions tasks/JointReports.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -387,43 +387,43 @@ task JointTablesSimu{
task JointReportsPoly{
input{
Array[File?] SNPCaller
Array[File?] updog
Array[File?] polyrad
Array[File?] supermassa
Array[File] SNPCaller
Array[File] updog
Array[File] polyrad
Array[File] supermassa
}
Int disk_size = ceil(size(SNPCaller, "GiB") * 1.5 + size(updog, "GiB") * 1.5 + size(polyrad, "GiB") * 1.5 + size(supermassa, "GiB") * 1.5)
Int memory_size = 4000
command <<<
snpcaller=(~{sep=" " SNPCaller})
updog=(~{sep=" " updog})
polyrad=(~{sep=" " polyrad})
supermassa=(~{sep=" " supermassa})
snpcaller=( ~{sep=" " SNPCaller} )
updog=( ~{sep=" " updog} )
polyrad=( ~{sep=" " polyrad} )
supermassa=( ~{sep=" " supermassa} )
mkdir results_all
for index in ${!snpcaller[*]};
for index in ${!snpcaller[*]}; do
tar -xvf ${snpcaller[$index]}
mv results/* results_all
rm -r results
done
for index in ${!updog[*]};
for index in ${!updog[*]}; do
tar -xvf ${updog[$index]}
mv results/* results_all
rm -r results
done
for index in ${!polyrad[*]};
for index in ${!polyrad[*]}; do
tar -xvf ${polyrad[$index]}
mv results/* results_all
rm -r results
done
for index in ${!supermassa[*]};
for index in ${!supermassa[*]}; do
tar -xvf ${supermassa[$index]}
mv results/* results_all
rm -r results
Expand Down
2 changes: 1 addition & 1 deletion tasks/freebayes.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ task RunFreebayes {
}

Int disk_size = ceil(size(reference, "GiB") + size(bam, "GiB") + 50)
Int memory_size = ceil(size(bam, "MiB") * 25 * max_cores + 10000)
Int memory_size = ceil(size(bam, "MiB") * 4 * max_cores + 10000)

command <<<

Expand Down
106 changes: 57 additions & 49 deletions tasks/mappoly.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ task MappolyReport {
String CountsFrom
String parent1
String parent2
Float prob_thres = 0.8
Int max_cores
Int ploidy
String filt_segr = "TRUE"
}

Int disk_size = ceil(size(vcf_file, "GiB") * 2)
Expand All @@ -25,7 +27,7 @@ task MappolyReport {
parent.2 = "~{parent2}",
verbose = FALSE,
read.geno.prob = TRUE,
prob.thres = 0.8, ploidy = ~{ploidy})
prob.thres = ~{prob_thres}, ploidy = ~{ploidy})
png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_raw_data.png"))
plot(dat)
Expand All @@ -34,12 +36,16 @@ task MappolyReport {
dat <- filter_missing(input.data = dat, type = "marker",
filter.thres = 0.25, inter = FALSE)
pval.bonf <- 0.05/dat[[3]]
mrks.chi.filt <- filter_segregation(dat,
chisq.pval.thres = pval.bonf,
inter = FALSE)
if("~{filt_segr}"){
pval.bonf <- 0.05/dat[[3]]
mrks.chi.filt <- filter_segregation(dat,
chisq.pval.thres = pval.bonf,
inter = FALSE)
seq.init <- make_seq_mappoly(mrks.chi.filt)
seq.init <- make_seq_mappoly(mrks.chi.filt)
} else {
seq.init <- make_seq_mappoly(dat, "all")
}
png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_","filters.png"))
plot(seq.init)
Expand All @@ -64,60 +70,62 @@ task MappolyReport {
plot(mat, ord = s.o[[3]])
dev.off()
est.map <- est_rf_hmm_sequential(input.seq = s.o,
start.set = 5,
thres.twopt = 10,
thres.hmm = 50,
extend.tail = 30,
twopt = all.rf.pairwise,
verbose = F,
phase.number.limit = 20,
sub.map.size.diff.limit = 5)
map.err <- est_full_hmm_with_global_error(input.map = est.map, error = 0.05)
map.prob <- est_full_hmm_with_prior_prob(input.map = est.map, dat.prob = dat)
png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_cMbyMb.png"))
plot_genome_vs_map(est.map, same.ch.lg = TRUE)
dev.off()
# est.map <- est_rf_hmm_sequential(input.seq = s.o,
# start.set = 5,
# thres.twopt = 10,
# thres.hmm = 50,
# extend.tail = 30,
# twopt = all.rf.pairwise,
# verbose = F,
# phase.number.limit = 20,
# sub.map.size.diff.limit = 5)
png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"global_error_cMbyMb.png"))
plot_genome_vs_map(map.err, same.ch.lg = TRUE)
dev.off()
# map.err <- est_full_hmm_with_global_error(input.map = est.map, error = 0.05)
# map.prob <- est_full_hmm_with_prior_prob(input.map = est.map, dat.prob = dat)
png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_cMbyMb.png"))
plot_genome_vs_map(map.prob, same.ch.lg = TRUE)
dev.off()
# png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_cMbyMb.png"))
# plot_genome_vs_map(est.map, same.ch.lg = TRUE)
# dev.off()
summary <- summary_maps(list(est.map, map.err, map.prob))
summary <- cbind(method = c("no_error", "global_error", "probs", "-"), summary)
# png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"global_error_cMbyMb.png"))
# plot_genome_vs_map(map.err, same.ch.lg = TRUE)
# dev.off()
write.csv(summary, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_summary.csv"))
# png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_cMbyMb.png"))
# plot_genome_vs_map(map.prob, same.ch.lg = TRUE)
# dev.off()
export_map_list(est.map, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_","map_file.csv"))
export_map_list(map.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_global_error_","map_file.csv"))
export_map_list(map.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_","map_file.csv"))
# summary <- summary_maps(list(est.map, map.err, map.prob))
# summary <- cbind(method = c("no_error", "global_error", "probs", "-"), summary)
png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_draw.png"))
plot_map_list(list(default = est.map,
global = map.err,
probs = map.prob), col = "ggstyle")
dev.off()
# write.csv(summary, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_summary.csv"))
genoprob <- calc_genoprob_error(input.map = est.map, error = 0)
genoprob.err <- calc_genoprob_error(input.map = map.err, error = 0.05)
genoprob.prob <- calc_genoprob_dist(input.map = map.prob, dat.prob = dat)
# export_map_list(est.map, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_no_error_","map_file.csv"))
# export_map_list(map.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_global_error_","map_file.csv"))
# export_map_list(map.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_probs_","map_file.csv"))
homoprobs = calc_homologprob(genoprob)
homoprobs.err = calc_homologprob(genoprob.err)
homoprobs.prob = calc_homologprob(genoprob.prob)
# png(paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"_map_draw.png"))
# plot_map_list(list(default = est.map,
# global = map.err,
# probs = map.prob), col = "ggstyle")
# dev.off()
save(homoprobs, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.RData"))
save(homoprobs.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.err.RData"))
save(homoprobs.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.prob.RData"))
# genoprob <- calc_genoprob_error(input.map = est.map, error = 0)
# genoprob.err <- calc_genoprob_error(input.map = map.err, error = 0.05)
# genoprob.prob <- calc_genoprob_dist(input.map = map.prob, dat.prob = dat)
# homoprobs = calc_homologprob(genoprob)
# homoprobs.err = calc_homologprob(genoprob.err)
# homoprobs.prob = calc_homologprob(genoprob.prob)
# save(homoprobs, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.RData"))
# save(homoprobs.err, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.err.RData"))
# save(homoprobs.prob, file = paste0("~{SNPCall_program}", "_","~{GenotypeCall_program}", "_", "~{CountsFrom}" ,"homoprobs.prob.RData"))
system("mkdir results")
system("mv *.png *.RData *csv results")
#system("mv *.png *.RData *csv results")
system("mv *.png results")
system(paste0("tar -czvf ", "~{SNPCall_program}", "_", "~{GenotypeCall_program}", "_", "~{CountsFrom}","_results.tar.gz results"))
RSCRIPT
Expand Down
36 changes: 17 additions & 19 deletions tests/pipelines/EmpiricalMaps/polyploid/inputs.json
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
{
"Maps.dataset": {
"parent2": "GeorgeVancouver",
"name": "rose_sub",
"parent1": "MordenBlush",
"chromosome": "Chr01",
"cross": "F1",
"multiallelics": false
},
"Maps.max_cores": 20,
"Maps.ploidy": 4,
"Maps.filters": "-r Chr01",
"Maps.gatk_mchap": "FALSE",
"Maps.vcfs_counts_source": ["vcf","vcf"],
"Maps.vcfs": ["/home/rose_lab/freebayes_tetraploid/gatk_norm.vcf.gz",
"/home/rose_lab/freebayes_tetraploid/freebayes_norm.vcf.gz"],
"Maps.vcfs_software": ["gatk", "freebayes"],
"Maps.filter_noninfo": true,
"Maps.replaceADbyMissing": "TRUE"
}
"Maps.ploidy": "4",
"Maps.dataset": {
"parent2": "P1",
"name": "roses",
"parent1": "P2",
"chromosome": "Chr04",
"cross": "F1",
"multiallelics": "false"
},
"Maps.max_cores": "2",
"Maps.gatk_mchap": "false",
"Maps.vcfs_counts_source": ["vcf"],
"Maps.vcfs_software": ["gatk"],
"Maps.filter_noninfo": "true",
"Maps.vcfs": ["gatk_vcf_norm.vcf.gz"],
"Maps.replaceADbyMissing": "TRUE"
}

0 comments on commit 9b3d1d0

Please sign in to comment.