Skip to content

Commit

Permalink
Merge pull request #110 from Cristianetaniguti/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
Cristianetaniguti authored Dec 5, 2023
2 parents 70c6690 + 1cc9663 commit cf879bb
Show file tree
Hide file tree
Showing 27 changed files with 224 additions and 193 deletions.
2 changes: 1 addition & 1 deletion .configurations/cromwell_no_mysql.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
backend {
default = SlurmSingularity
default = Local

providers {

Expand Down
3 changes: 2 additions & 1 deletion .dockerfiles/reads2map/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ RUN Rscript -e 'remotes::install_version("gsalib",upgrade="never", version = "2.
RUN Rscript -e 'remotes::install_github("tpbilton/GUSbase", ref = "92119b9c57faa7abeede8236d24a4a8e85fb3df7")'

RUN Rscript -e 'remotes::install_github("tpbilton/GUSMap", ref = "4d7d4057049819d045750d760a45976c8f30dac6")'
RUN Rscript -e 'remotes::install_github("dcgerard/updog", ref="f1")'
RUN Rscript -e 'remotes::install_github("dcgerard/updog")'

RUN Rscript -e 'remotes::install_github("Cristianetaniguti/onemap")'

# Still privates
RUN Rscript -e 'remotes::install_github("Cristianetaniguti/simuscopR")'
RUN Rscript -e 'remotes::install_github("Cristianetaniguti/Reads2MapTools")'
RUN Rscript -e 'remotes::install_github("Cristianetaniguti/MAPpoly")'
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ The Reads2Map workflows perform the SNP and genotype/dosage calling for your com

Multiple systems are available to run WDL workflows such as Cromwell, miniWDL, and dxWDL. See further information in the [openwdl documentation](https://github.com/openwdl/wdl#execution-engines).

In addition, we also suggest two wrappers: [cromwell-cli](https://github.com/lmtani/cromwell-cli) and [Caper](https://github.com/ENCODE-DCC/caper). Here is a tutorial on how to setup these tools and one example running the EmpiricalReads2Map:
In addition, we also suggest two wrappers: [pumbaa](https://github.com/lmtani/pumbaa) and [Caper](https://github.com/ENCODE-DCC/caper). Here is a tutorial on how to setup these tools and one example running the EmpiricalReads2Map:

* [Setup and run Reads2Map workflows](https://cristianetaniguti.github.io/Tutorials/Reads2Map/Setup_and_run_Reads2Map_workflows.html)

Expand Down Expand Up @@ -73,6 +73,10 @@ Taniguti, C. H.; Taniguti, L. M.; Amadeu, R. R.; Lau, J.; de Siqueira Gesteira,
- [simuscopR](https://github.com/Cristianetaniguti/simuscopR) in [cristaniguti/reads2map:0.0.1](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Wrap-up R package for SimusCop simulations
- [MAPpoly](https://github.com/mmollina/MAPpoly) in [cristaniguti/reads2map:0.0.5](https://hub.docker.com/repository/docker/cristaniguti/reads2map): Build linkage maps for autopolyploid species

### How to cite

Taniguti, C. H.; Taniguti, L. M.; Amadeu, R. R.; Lau, J.; de Siqueira Gesteira, G.; Oliveira, T. de P.; Ferreira, G. C.; Pereira, G. da S.; Byrne, D.; Mollinari, M.; Riera-Lizarazu, O.; Garcia, A. A. F. Developing best practices for genotyping-by-sequencing analysis in the construction of linkage maps. GigaScience, 12, giad092. https://doi.org/10.1093/gigascience/giad092

### Funding

This work was partially supported by the National Council for Scientific and Technological Development (CNPq - 313269/2021-1); by USDA, National Institute of Food and Agriculture (NIFA), Specialty Crop Research Initiative (SCRI) project “Tools for Genomics Assisted Breeding in Polyploids: Development of a Community Resource” (Award No. 2020-51181-32156); and by the Bill and Melinda Gates Foundation (OPP1213329) project SweetGAINS.
7 changes: 7 additions & 0 deletions pipelines/EmpiricalMaps/EmpiricalMaps.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# 1.3.0

* Add MAPpoly new functions framework_map and update_framework_map
* Update tests
* Polyploid analysis output compatible with Reads2MapApp v0.0.1
* Remove LargeList deprecated package as dependency

# 1.2.5

* more flexibility to choose the probability to be used in the HMM:
Expand Down
14 changes: 9 additions & 5 deletions pipelines/EmpiricalMaps/EmpiricalMaps.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ workflow Maps {
max_cores = max_cores,
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
filt_segr = filt_segr,
global_errors = global_errors
}
}

Expand All @@ -206,7 +207,8 @@ workflow Maps {
max_cores = max_cores,
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
filt_segr = filt_segr,
global_errors = global_errors
}
}

Expand All @@ -223,7 +225,8 @@ workflow Maps {
max_cores = max_cores,
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
filt_segr = filt_segr,
global_errors = global_errors
}
}

Expand All @@ -237,9 +240,10 @@ workflow Maps {
parent1 = dataset.parent1,
parent2 = dataset.parent2,
max_cores = max_cores,
ploidy = ploidy,
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
filt_segr = filt_segr,
global_errors = global_errors
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions pipelines/EmpiricalReads2Map/EmpiricalReads2Map.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# 1.5.0

* Adapt tassel and stacks tasks also for polyploids
* Update tests
* Add MAPpoly new functions framework_map and update_framework_map
* Polyploid analysis output compatible with Reads2MapApp v0.0.1
* Remove LargeList deprecated package as dependency

# 1.4.3

* Update example for pair-end reads inputs
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.4.3

* Adapt tassel and stacks tasks also for polyploids
* Update tests

# 1.4.2

* Update example for pair-end reads inputs
Expand Down
4 changes: 3 additions & 1 deletion subworkflows/mappoly_maps_empirical.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ workflow MappolyMapsEmp {
Int max_cores
Int ploidy
String? filt_segr
Array[String] global_errors
}

call utilsR.ReGenotyping {
Expand All @@ -43,7 +44,8 @@ workflow MappolyMapsEmp {
max_cores = max_cores,
ploidy = ploidy,
prob_thres = prob_thres,
filt_segr = filt_segr
filt_segr = filt_segr,
global_errors = global_errors
}

output {
Expand Down
33 changes: 15 additions & 18 deletions tasks/JointReports.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ task JointAllReports{
supermassaPolyMaps <- str_split("~{sep=';' supermassaPolyMaps}", ";", simplify = TRUE)
all_files <- c(SNPCallerPolyMapsEmp, updogPolyMaps, polyradPolyMaps, supermassaPolyMaps)
all_files <- all_files[-which(all_files == "")]
if(length(which(all_files == "")) > 0) all_files <- all_files[-which(all_files == "")]
system("mkdir results_all")
Expand Down Expand Up @@ -64,7 +64,7 @@ task JointAllReports{
list_files <- untar(files[[i]][[j]], exdir = path_dir, list = T)
system(paste0("mv ",path_dir, "/",list_files[1], "*_map_report.tsv.gz ", path_dir, "/maps"))
system(paste0("mv ",path_dir, "/",list_files[1], "*_times_report.tsv.gz ", path_dir, "/times"))
system(paste0("mv ",path_dir, "/",list_files[1], "*.RData ", path_dir, "/RDatas"))
system(paste0("mv ",path_dir, "/",list_files[1], "*.rds ", path_dir, "/RDatas"))
if(!grepl("gusmap", list_files[1])){
system(paste0("mv ",path_dir, "/",list_files[1], "*_filters_report.tsv.gz ", path_dir, "/filters"))
system(paste0("mv ",path_dir, "/",list_files[1], "*_errors_report.tsv.gz ", path_dir, "/errors"))
Expand Down Expand Up @@ -150,7 +150,6 @@ task JointReports{
library(tidyr)
library(stringr)
library(vroom)
library(largeList)
SNPCaller <- str_split("~{sep=";" SNPCaller}", ";", simplify = T)
updog <- str_split("~{sep=";" updog}", ";", simplify = T)
Expand All @@ -168,7 +167,7 @@ task JointReports{
list_files <- untar(files[[i]][[j]], exdir = path_dir, list = T)
system(paste0("mv ",path_dir, "/",list_files[1], "*_map_report.tsv.gz ", path_dir, "/maps"))
system(paste0("mv ",path_dir, "/",list_files[1], "*_times_report.tsv.gz ", path_dir, "/times"))
system(paste0("mv ",path_dir, "/",list_files[1], "*.RData ", path_dir, "/RDatas"))
system(paste0("mv ",path_dir, "/",list_files[1], "*.rds ", path_dir, "/RDatas"))
if(!grepl("gusmap", list_files[1])){
system(paste0("mv ",path_dir, "/",list_files[1], "*_filters_report.tsv.gz ", path_dir, "/filters"))
system(paste0("mv ",path_dir, "/",list_files[1], "*_errors_report.tsv.gz ", path_dir, "/errors"))
Expand Down Expand Up @@ -212,11 +211,11 @@ task JointReports{
class(RDatas[[i]]) <- "list"
}
saveList(RDatas, file = "sequences_emp.llo", append=FALSE, compress=TRUE)
saveRDS(RDatas, file = "sequences_emp.rds")
new_names <- names(all_RDatas)
vroom_write(as.data.frame(new_names), "names.tsv.gz")
save(gusmap_RDatas, file = "gusmap_RDatas.RData")
saveRDS(gusmap_RDatas, file = "gusmap_RDatas.rds")
# Outputs
vroom_write(errors, "data1_depths_geno_prob.tsv.gz", num_threads = ~{max_cores})
Expand All @@ -225,7 +224,7 @@ task JointReports{
vroom_write(times, "data4_times.tsv.gz", num_threads = ~{max_cores})
system("mkdir EmpiricalReads_results")
system("mv gusmap_RDatas.RData sequences_emp.llo data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results")
system("mv gusmap_RDatas.rds sequences_emp.rds data1_depths_geno_prob.tsv.gz data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz names.tsv.gz EmpiricalReads_results")
system("tar -czvf EmpiricalReads_results.tar.gz EmpiricalReads_results")
RSCRIPT
Expand Down Expand Up @@ -275,7 +274,6 @@ task JointReportsSimu {
library(tidyr)
library(stringr)
library(vroom)
library(largeList)
library(vcfR)
SNPCaller <- str_split("~{sep=";" SNPCaller}", ";", simplify = T)
Expand All @@ -296,7 +294,7 @@ task JointReportsSimu {
list_files <- untar(files[[i]][[j]], exdir = path_dir, list = T)
system(paste0("mv ",path_dir, "/",list_files[1], "*_map_report.tsv.gz ", path_dir, "/maps"))
system(paste0("mv ",path_dir, "/",list_files[1], "*_times_report.tsv.gz ", path_dir, "/times"))
system(paste0("mv ",path_dir, "/",list_files[1], "*.RData ", path_dir, "/RDatas"))
system(paste0("mv ",path_dir, "/",list_files[1], "*.rds ", path_dir, "/RDatas"))
if(!grepl("gusmap", list_files[1])){
system(paste0("mv ",path_dir, "/",list_files[1], "*_filters_report.tsv.gz ", path_dir, "/filters"))
system(paste0("mv ",path_dir, "/",list_files[1], "*_errors_report.tsv.gz ", path_dir, "/errors"))
Expand Down Expand Up @@ -361,11 +359,11 @@ task JointReportsSimu {
class(RDatas[[i]]) <- "list"
}
saveList(RDatas, file = "data6_RDatas.llo", append=FALSE, compress=TRUE)
saveRDS(RDatas, file = "data6_RDatas.rds")
new_names <- names(all_RDatas)
vroom_write(as.data.frame(new_names), "names.tsv.gz")
save(gusmap_RDatas, file = "gusmap_RDatas.RData")
saveRDS(gusmap_RDatas, file = "gusmap_RDatas.rds")
# Outputs
vroom_write(tsvs[[3]], "data1_depths_geno_prob.tsv.gz", num_threads = ~{max_cores})
Expand Down Expand Up @@ -401,8 +399,8 @@ task JointReportsSimu {
File data3_filters = "data3_filters.tsv.gz"
File data4_times = "data4_times.tsv.gz"
File data5_SNPCall_efficiency = "data5_SNPCall_efficiency.tsv.gz"
File data6_RDatas = "data6_RDatas.llo"
File data7_gusmap = "gusmap_RDatas.RData"
File data6_RDatas = "data6_RDatas.rds"
File data7_gusmap = "gusmap_RDatas.rds"
File data8_names = "names.tsv.gz"
File data10_counts = "data10_CountVariants.tsv.gz"
}
Expand Down Expand Up @@ -432,7 +430,6 @@ task JointTablesSimu{
R --vanilla --no-save <<RSCRIPT
library(tidyverse)
library(largeList)
library(vroom)
datas <- list()
Expand All @@ -456,9 +453,9 @@ task JointTablesSimu{
for(i in 1:length(datas[[j]])){
temp <- readList(datas[[j]][i])
if(i == 1){
saveList(temp, file="sequences.llo", append = F, compress = T)
saveRDS(temp, file="sequences.rds", append = F, compress = T)
} else {
saveList(temp, file="sequences.llo", append = T, compress = T)
saveRDS(temp, file="sequences.rds", append = T, compress = T)
}
}
} else if(j == 7){
Expand All @@ -467,7 +464,7 @@ task JointTablesSimu{
Rdata_lst[[i]] <- get(temp)
}
Rdatas <- do.call(c, Rdata_lst)
save(Rdatas, file = "gusmap_RDatas.RData")
saveRDS(Rdatas, file = "gusmap_RDatas.rds")
} else {
for(i in 1:length(datas[[j]])){
data_lst[[i]] <- vroom(datas[[j]][i], delim = "\t")
Expand All @@ -492,7 +489,7 @@ task JointTablesSimu{
vroom_write(data.names, "names.tsv.gz")
system("mkdir SimulatedReads_results_depth~{depth}")
system("mv gusmap_RDatas.RData sequences.llo data1_depths_geno_prob.tsv.gz \
system("mv gusmap_RDatas.rdssequences.rds data1_depths_geno_prob.tsv.gz \
data2_maps.tsv.gz data3_filters.tsv.gz data4_times.tsv.gz data5_SNPCall_efficiency.tsv.gz data10_counts.tsv.gz \
simu_haplo.tsv.gz names.tsv.gz ~{sep=" " plots} ~{sep=" " positions} SimulatedReads_results_depth~{depth}")
system("tar -czvf SimulatedReads_results_depth~{depth}.tar.gz SimulatedReads_results_depth~{depth}")
Expand Down
8 changes: 4 additions & 4 deletions tasks/bcftools.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ task FixTasselVCF {
command <<<

sed 's/PL,Number=./PL,Number=G/g' ~{vcf_file} > tassel_fix.vcf
sed 's/AD,Number=./AD,Number=R/g' tassel_fix.vcf > tassel_fix.vcf
sed 's/AF,Number=./AF,Number=A/g' tassel_fix.vcf > tassel_fix.vcf
sed '/INFO=<ID=AF/a ##INFO=<ID=QualityScore,Number=1,Type=Float,Description="Tassel specific score">' tassel_fix.vcf > tassel_fix.vcf
sed -i 's/AD,Number=./AD,Number=R/g' tassel_fix.vcf
sed -i 's/AF,Number=./AF,Number=A/g' tassel_fix.vcf
sed -i '/INFO=<ID=AF/a ##INFO=<ID=QualityScore,Number=1,Type=Float,Description="Tassel specific score">' tassel_fix.vcf

grep ">" ~{reference} > chrs
sed -i 's/>//' chrs
Expand Down Expand Up @@ -97,6 +97,6 @@ task FixTasselVCF {
}

output {
File vcf_fixed = "tassel_fix_chr.vcf"
File vcf_fixed = "tassel_fix_chr.vcf.gz"
}
}
11 changes: 5 additions & 6 deletions tasks/gusmap.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ task GusmapReport {
vroom::vroom_write(info[[2]], "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_map_report.tsv.gz", num_threads = ~{max_cores})
vroom::vroom_write(times, "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_times_report.tsv.gz", num_threads = ~{max_cores})
map_out <- info[[1]]
save(map_out, file= "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.RData")
saveRDS(map_out, file= "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.rds")
RSCRIPT
Expand All @@ -66,7 +66,7 @@ task GusmapReport {
output {
File maps_report = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_map_report.tsv.gz"
File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.RData"
File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}.rds"
File times = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_times_report.tsv.gz"
}
}
Expand Down Expand Up @@ -94,8 +94,7 @@ task GusmapReportForSimulated {
library(GUSMap)
library(Reads2MapTools)
simu_onemap_obj <- load("~{simu_onemap_obj}")
simu_onemap_obj <- get(simu_onemap_obj)
simu_onemap_obj <- readRDS("~{simu_onemap_obj}")
if(tail(strsplit("~{vcf_file}", "[.]")[[1]],1) =="gz") {
vcf.temp <- paste0("~{SNPCall_program}",".vcf")
Expand Down Expand Up @@ -139,7 +138,7 @@ task GusmapReportForSimulated {
RDatas_joint[[2]] <- info_correct[[1]]
names(RDatas_joint) <- c("map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_TRUE",
"map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_FALSE")
save(RDatas_joint, file= "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.RData")
saveRDS(RDatas_joint, file= "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.rds")
# Joint times data.frames
times_temp <- data.frame(seed = ~{seed}, depth = ~{depth}, SNPCall = "~{SNPCall_program}",
Expand Down Expand Up @@ -174,7 +173,7 @@ task GusmapReportForSimulated {
output {
File maps_report = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}_map_report.tsv.gz"
File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.RData"
File maps_RData = "map_~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}.rds"
File times = "~{SNPCall_program}_~{CountsFrom}_~{GenotypeCall_program}_~{seed}_~{depth}_times_report.tsv.gz"
}
}
Expand Down
Loading

0 comments on commit cf879bb

Please sign in to comment.