Skip to content

Commit

Permalink
group upgma
Browse files Browse the repository at this point in the history
  • Loading branch information
Cristianetaniguti committed May 25, 2021
1 parent d94e097 commit 59246e8
Show file tree
Hide file tree
Showing 12 changed files with 355 additions and 89 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: onemap
Title: Construction of Genetic Maps in Experimental Crosses: Full-Sib,
RILs, F2 and Backcrosses
Version: 2.4.0
Version: 2.5.0
Description: Analysis of molecular marker data from model (backcrosses,
F2 and recombinant inbred lines) and non-model systems (i. e.
outcrossing species). For the later, it allows statistical
Expand Down
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Generated by roxygen2: do not edit by hand

S3method(plot,group.upgma)
S3method(plot,onemap)
S3method(plot,onemap_progeny_haplotypes)
S3method(plot,onemap_progeny_haplotypes_counts)
S3method(plot,onemap_segreg_test)
S3method(print,compare)
S3method(print,group)
S3method(print,group.upgma)
S3method(print,group_seq)
S3method(print,onemap)
S3method(print,onemap_bin)
Expand All @@ -32,6 +34,7 @@ export(filter_prob)
export(find_bins)
export(group)
export(group_seq)
export(group_upgma)
export(haldane)
export(kosambi)
export(make_seq)
Expand Down Expand Up @@ -78,6 +81,7 @@ import(ggplot2)
import(parallel)
import(tidyr)
importFrom(RColorBrewer,brewer.pal)
importFrom(dendextend,color_branches)
importFrom(ggpubr,ggarrange)
importFrom(grDevices,bmp)
importFrom(grDevices,colorRamp)
Expand All @@ -94,6 +98,7 @@ importFrom(graphics,axis)
importFrom(graphics,layout)
importFrom(graphics,lines)
importFrom(graphics,par)
importFrom(graphics,pie)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,segments)
Expand All @@ -104,11 +109,19 @@ importFrom(methods,is)
importFrom(plotly,ggplotly)
importFrom(reshape2,dcast)
importFrom(reshape2,melt)
importFrom(stats,as.dendrogram)
importFrom(stats,as.dist)
importFrom(stats,chisq.test)
importFrom(stats,cor.test)
importFrom(stats,cutree)
importFrom(stats,dist)
importFrom(stats,hclust)
importFrom(stats,lm)
importFrom(stats,na.omit)
importFrom(stats,predict)
importFrom(stats,qchisq)
importFrom(stats,quantile)
importFrom(stats,rect.hclust)
importFrom(stats,rf)
importFrom(stats,setNames)
importFrom(utils,browseURL)
Expand Down
203 changes: 203 additions & 0 deletions R/group_upgma.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#' Assign markers to linkage groups
#'
#' Identifies linkage groups of markers using the results of two-point
#' (pairwise) analysis and UPGMA method. Function adapted from MAPpoly package
#' written by Marcelo Mollinari.
#'
#' @param input.seq an object of class \code{mappoly.rf.matrix}
#'
#' @param expected.groups when available, inform the number of expected
#' linkage groups (i.e. chromosomes) for the species
#'
#' @param inter if \code{TRUE} (default), plots a dendrogram highlighting the
#' expected groups before continue
#'
#' @param comp.mat if \code{TRUE}, shows a comparison between the reference
#' based and the linkage based grouping, if the sequence information is
#' available (default = FALSE)
#'
#' @param verbose logical. If \code{TRUE} (default), current progress is shown;
#' if \code{FALSE}, no output is produced
#'
#' @return Returns an object of class \code{group}, which is a list
#' containing the following components:
#' \item{data.name}{the referred dataset name}
#' \item{hc.snp}{a list containing information related to
#' the UPGMA grouping method}
#' \item{expected.groups}{the number of expected linkage groups}
#' \item{groups.snp}{the groups to which each of the markers belong}
#' \item{seq.vs.grouped.snp}{comparison between the genomic group information
#' (when available) and the groups provided by \code{group_upgma}}
#' \item{LOD}{minimum LOD Score to declare linkage.}
#' \item{max.rf}{maximum recombination fraction to declare linkage.}
#' \item{twopt}{name of the object of class \code{rf.2ts}
#' used as input, i.e., containing information used to assign
#' markers to linkage groups.}
#'
#' @examples
#'
#' data("vcf_example_out")
#' twopts <- rf_2pts(vcf_example_out)
#' input.seq <- make_seq(twopts, "all")
#' lgs <- group_upgma(input.seq, expected.groups = 3, comp.mat=T)
#' plot(lgs)
#'
#' @author Marcelo Mollinari, \email{mmollin@ncsu.edu}
#' @author Cristiane Taniguti \email{chtaniguti@usp.br}
#'
#' @references
#' Mollinari, M., and Garcia, A. A. F. (2019) Linkage
#' analysis and haplotype phasing in experimental autopolyploid
#' populations with high ploidy level using hidden Markov
#' models, _G3: Genes, Genomes, Genetics_.
#' \doi{10.1534/g3.119.400378}
#'
#' @importFrom graphics abline pie
#' @importFrom stats as.dendrogram as.dist cutree hclust lm predict quantile rect.hclust
#' @importFrom dendextend color_branches
#'
#' @export
group_upgma <- function(input.seq, expected.groups = NULL,
inter = TRUE, comp.mat = FALSE, verbose = TRUE)
{
## checking for correct objects
if(!any(is(input.seq,"sequence")))
stop(deparse(substitute(input.seq))," is not an object of class 'sequence'")

## extracting data
if(is(input.seq$data.name, "outcross") | is(input.seq$data.name, "f2"))
{
## making a list with necessary information
n.mrk <- length(input.seq$seq.num)
LOD <- lapply(input.seq$twopt$analysis,
function(x, w){
m<-matrix(0, length(w), length(w))
for(i in 1:(length(w)-1)){
for(j in (i+1):length(w)){
z<-sort(c(w[i],w[j]))
m[j,i]<-m[i,j]<-x[z[1], z[2]]
}
}
return(m)
}, input.seq$seq.num
)
mat<-t(get_mat_rf_out(input.seq, LOD=TRUE, max.rf = 0.501, min.LOD = -0.1))
} else {
#if(input.seq$seq.rf[1] == -1 || is.null(input.seq$seq.like))
#stop("You must estimate parameters before running 'rf_graph_table' ")
## making a list with necessary information
n.mrk <- length(input.seq$seq.num)
LOD<-matrix(0, length(input.seq$seq.num), length(input.seq$seq.num))
for(i in 1:(length(input.seq$seq.num)-1)){
for(j in (i+1):length(input.seq$seq.num)){
z<-sort(c(input.seq$seq.num[i],input.seq$seq.num[j]))
LOD[j,i]<-LOD[i,j]<-input.seq$twopt$analysis[z[1], z[2]]
}
}
mat<-t(get_mat_rf_in(input.seq, LOD=TRUE, max.rf = 0.501, min.LOD = -0.1))
}

diag(mat) <- 0
mn<-input.seq$data.name$CHROM[input.seq$seq.num]
mn[is.na(mn)]<-"NH"
dimnames(mat)<-list(mn, mn)

hc.snp<-hclust(as.dist(mat), method = "average")
ANSWER <- "flag"
if(interactive() && inter)
{
dend.snp <- as.dendrogram(hc.snp)
while(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER !="")
{
dend1 <- dendextend::color_branches(dend.snp, k = expected.groups)
plot(dend1, leaflab = "none")
if(is.null(expected.groups))
expected.group <- as.numeric(readline("Enter the number of expected groups: "))
z<-rect.hclust(hc.snp, k = expected.groups, border = "red")
groups.snp <- cutree(tree = hc.snp, k = expected.groups)
xy<-sapply(z, length)
xt<-as.numeric(cumsum(xy)-ceiling(xy/2))
yt<-.1
points(x = xt, y = rep(yt, length(xt)), cex = 6, pch = 20, col = "lightgray")
text(x = xt, y = yt, labels = pmatch(xy, table(groups.snp, useNA = "ifany")), adj = .5)
ANSWER <- readline("Enter 'Y/n' to proceed or update the number of expected groups: ")
if(substr(ANSWER, 1, 1) == "n" | substr(ANSWER, 1, 1) == "no" | substr(ANSWER, 1, 1) == "N")
stop("You decided to stop the function.")
if(substr(ANSWER, 1, 1) != "y" && substr(ANSWER, 1, 1) != "yes" && substr(ANSWER, 1, 1) != "Y" && ANSWER !="")
expected.groups <- as.numeric(ANSWER)
}
}
if(is.null(expected.groups))
stop("Inform the 'expected.groups' or use 'inter = TRUE'")

# Distribution of SNPs into linkage groups
seq.vs.grouped.snp <- NULL
if(all(unique(mn) == "NH") & comp.mat)
{
comp.mat <- FALSE
seq.vs.grouped.snp <- NULL
warning("There is no physical reference to generate a comparison matrix")
}
groups.snp <- cutree(tree = hc.snp, k = expected.groups)
if(comp.mat){
seq.vs.grouped.snp<-matrix(0, expected.groups, length(na.omit(unique(mn)))+1,
dimnames = list(1:expected.groups, c(na.omit(unique(mn)),"NH")))
for(i in 1:expected.groups)
{
x<-table(names(which(groups.snp==i)))
seq.vs.grouped.snp[i,names(x)]<-x
}
idtemp2 <- apply(seq.vs.grouped.snp, 1, which.max)
seq.vs.grouped.snp<-cbind(seq.vs.grouped.snp[,unique(idtemp2)], seq.vs.grouped.snp[,"NH"])
cnm<-colnames(seq.vs.grouped.snp)
cnm[cnm==""]<-"NoChr"
colnames(seq.vs.grouped.snp)<-cnm
} else {
seq.vs.grouped.snp <- NULL
}
names(groups.snp)<-input.seq$seq.num
structure(list(data.name = input.seq$data.name,
hc.snp = hc.snp,
expected.groups = expected.groups,
n.groups = length(unique(groups.snp)),
groups = groups.snp,
seq.num =input.seq$seq.num,
seq.vs.grouped.snp = seq.vs.grouped.snp,
LOD = input.seq$LOD,
max.rf = input.seq$max.rf,
twopt <- input.seq$twopt),
class = "group.upgma")
}

#' @export
print.group.upgma <- function(x, detailed = TRUE, ...) {
cat(" This is an object of class 'group.upgma'\n ------------------------------------------\n")
## criteria
cat(" Criteria used to assign markers to groups:\n\n")
cat(" - Number of markers: ", length(x$groups), "\n")
cat(" - Number of linkage groups: ", length(unique(x$groups)), "\n")
cat(" - Number of markers per linkage groups: \n")
w<-data.frame(table(x$groups, useNA = "ifany"))
colnames(w) = c(" group", "n.mrk")
print (w, row.names = FALSE)
cat(" ------------------------------------------\n")

## printing summary
if(!is.null(x$seq.vs.grouped.snp)){
print(x$seq.vs.grouped.snp)
cat(" ------------------------------------------\n")
}
}

#' @export
plot.group.upgma <- function(x, ...) {
dend <- as.dendrogram(x$hc.snp)
dend1 <- dendextend::color_branches(dend, k = x$expected.groups)
plot(dend1, leaflab = "none")
z<-rect.hclust(x$hc.snp, k = x$expected.groups, border = "red")
xy<-sapply(z, length)
xt<-as.numeric(cumsum(xy)-ceiling(xy/2))
yt<-.1
points(x = xt, y = rep(yt, length(xt)), cex = 6, pch = 20, col = "lightgray")
text(x = xt, y = yt, labels = pmatch(xy, table(x$groups, useNA = "ifany")), adj = .5)
}
12 changes: 6 additions & 6 deletions R/make_seq.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@
##' of class \code{compare} or \code{try}, this argument indicates which
##' combination of linkage phases should be chosen, for the particular order
##' given by argument \code{arg}. In both cases, \code{NULL} (default) makes the
##' best combination to be taken. If \code{input.obj} is of class, \code{group}
##' or \code{order}, this argument has no effect.
##' best combination to be taken. If \code{input.obj} is of class, \code{group},
##' \code{group.upgma} or \code{order}, this argument has no effect.
##' @param data.name a \code{string} indicating the name of the object which
##' contains the raw data. This does not have to be defined by the
##' user: it is here for compatibility issues when calling \code{make_seq} from
Expand Down Expand Up @@ -112,8 +112,8 @@
make_seq <-
function(input.obj, arg = NULL, phase = NULL, data.name = NULL, twopt = NULL) {
# checking for correct object
if(!(is(input.obj, c("onemap", "rf_2pts", "group", "compare", "try", "order"))))
stop(deparse(substitute(input.obj))," is not an object of class 'onemap', 'rf_2pts', 'group', 'compare', 'try' or 'order'")
if(!(is(input.obj, c("onemap", "rf_2pts", "group", "compare", "try", "order", "group.upgma"))))
stop(deparse(substitute(input.obj))," is not an object of class 'onemap', 'rf_2pts', 'group', 'group.upgma','compare', 'try' or 'order'")
if(is(input.obj, "onemap")){
if (length(arg) == 1 && is.character(arg)) {
seq.num <- which(input.obj$CHROM == arg)
Expand Down Expand Up @@ -158,9 +158,9 @@ make_seq <-
seq.rf <- -1
seq.like <- NULL
if(is.null(twopt)) twopt <- input.obj
} else if (is(input.obj, "group")){
} else if (is(input.obj, "group") | is(input.obj, "group.upgma")){
if(length(arg) == 1 && is.numeric(arg) && arg <= input.obj$n.groups) seq.num <- input.obj$seq.num[which(input.obj$groups == arg)]
else stop("for this object of class 'group', \"arg\" must be an integer less than or equal to ",input.obj$n.groups)
else stop("for this object of class 'group' or 'group.upgma', \"arg\" must be an integer less than or equal to ",input.obj$n.groups)
seq.phases <- -1
seq.rf <- -1
seq.like <- NULL
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/onemap)](https://cran.r-project.org/package=onemap)
[![CRAN_monthly_downloads](https://cranlogs.r-pkg.org/badges/onemap)](https://cranlogs.r-pkg.org/badges/onemap)

# OneMap <img src="https://user-images.githubusercontent.com/7572527/119237022-0b19a400-bb11-11eb-9d45-228a59f22a1a.png" align="right" width="160"/>
# OneMap <img src="https://user-images.githubusercontent.com/7572527/119237022-0b19a400-bb11-11eb-9d45-228a59f22a1a.png" align="right" width="200"/>

`OneMap` is a software for constructing genetic maps in experimental
crosses: full-sib, RILs, F2 and backcrosses. It was developed by
Expand Down
31 changes: 0 additions & 31 deletions inst/extdata/mapfile_f2.txt

This file was deleted.

43 changes: 0 additions & 43 deletions inst/extdata/mapfile_out.txt

This file was deleted.

Loading

0 comments on commit 59246e8

Please sign in to comment.