From aa0a1dcea9654c8a04c61fc5a1c791a4e1e60891 Mon Sep 17 00:00:00 2001 From: jhsiao999 Date: Thu, 12 Sep 2019 20:30:13 -0500 Subject: [PATCH 1/6] more fixes...to prepare for submission --- DESCRIPTION | 24 +++++++---------- NAMESPACE | 1 - R/circ_dist.R | 5 +++- R/cycle_npreg.R | 32 +++++++++------------- R/data.R | 49 ++++++++++++++++++---------------- R/data_transform_quantile.R | 7 +++-- R/fit_cyclic_many.R | 8 +++--- R/fit_cyclic_one.R | 32 +++++++++++----------- R/intensity2circle.R | 2 ++ R/shift_origin.R | 2 ++ man/cycle_npreg_insample.Rd | 2 +- man/fit_bspline.Rd | 1 + man/fit_cyclical_many.Rd | 1 + man/fit_loess.Rd | 1 + man/fit_trendfilter_generic.Rd | 2 +- man/intensity2circle.Rd | 2 ++ 16 files changed, 85 insertions(+), 86 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1fc7664..069f441 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,17 +3,15 @@ Type: Package Package: peco Version: 0.99.0 Date: 2019-09-11 -Title: A supervised approach for **P**r**e**dicting **c**ell cycle - pr**o**gression using scRNA-seq data -Authors@R: c(person("Chiaowen Joyce","Hsiao",email="joyce.hsiao1@gmail.com", - role=c("aut","cre"), - comment = c(ORCID = "0000-0001-8961-7522")), - person("Matthew","Stephens", email="stephens999@gmail.com", - role="aut", comment = c(ORCID = "0000-0001-5397-9257")), - person("John","Blischak", email = "jdblischak@gmail.com", - role="ctb", comment = c(ORCID = "0000-0003-2634-9879")), - person("Peter","Carbonetto", email = "peter.carbonetto@gmail.com", - role="ctb", comment = c(ORCID = "0000-0003-1144-6780"))) +Title: A Supervised Approach for **P**r**e**dicting **c**ell Cycle + Pr**o**gression using scRNA-seq data +Authors@R: c( + person("Chiaowen Joyce","Hsiao", email="joyce.hsiao1@gmail.com", + role=c("aut","cre")), + person("Matthew", "Stephens", email="stephens999@gmail.com", role="aut"), + person("John", "Blischak", email = "jdblischak@gmail.com", role="ctb"), + person("Peter", "Carbonetto", email = "peter.carbonetto@gmail.com", + role="ctb")) Maintainer: Chiaowen Joyce Hsiao Description: Our approach provides a way to assign continuous cell cycle phase using scRNA-seq data, and consequently, allows to identify cyclic trend @@ -31,10 +29,8 @@ Imports: circular, conicfit, genlasso, - ggplot2, graphics, - MASS, - Matrix, + methods, parallel, stats Suggests: diff --git a/NAMESPACE b/NAMESPACE index 94a50d2..89593ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,7 +18,6 @@ importFrom(conicfit,EllipseDirectFit) importFrom(conicfit,Residuals.ellipse) importFrom(conicfit,calculateEllipse) importFrom(genlasso,cv.trendfilter) -importFrom(genlasso,predict.genlasso) importFrom(genlasso,trendfilter) importFrom(graphics,plot) importFrom(graphics,points) diff --git a/R/circ_dist.R b/R/circ_dist.R index 54b675f..71404dd 100644 --- a/R/circ_dist.R +++ b/R/circ_dist.R @@ -1,3 +1,5 @@ +#' @name circ_dist +#' #' @title Pairwise distance between two circular variables #' #' @description We define distance between two angles: the minimum of @@ -24,11 +26,12 @@ #' #' @author Joyce Hsiao, Matthew Stephens #' @export -#' circ_dist <- function(y1,y2) { pmin(abs(y2-y1), abs(2*pi-(abs(y2-y1)))) } +#' @name rotation +#' #' @title Rotate circular variable shift_var to minimize distance #' between ref_var and shift_var #' diff --git a/R/cycle_npreg.R b/R/cycle_npreg.R index 1447581..b57f148 100644 --- a/R/cycle_npreg.R +++ b/R/cycle_npreg.R @@ -5,6 +5,8 @@ # to another gene expression dataset to infer an angle or cell cycle # phase for each cell. +#' @name cycle_npreg_insample +#' #' @title Obtain cyclic trend estimates from the training data #' #' @description Estimates cyclic trends of gene expression levels @@ -39,6 +41,7 @@ #' trends of gene express levels for each gene.} #' #' @examples +#' library(Biobase) #' # import data #' data(eset_sub) #' @@ -114,9 +117,6 @@ #' @author Joyce Hsiao #' #' @export -#' -#' @import methods Biobase MASS Matrix ggplot2 -NULL cycle_npreg_insample <- function(Y, theta, ncores=4, polyorder=2, @@ -142,6 +142,8 @@ cycle_npreg_insample <- function(Y, theta, funs_est=initial_mstep$funs)) } +#' @name cycle_npreg_outsample +#' #' @title Predict test-sample ordering using training labels (no update) #' #' @description Apply the estimates of cycle_npreg_insample to another @@ -276,9 +278,6 @@ cycle_npreg_insample <- function(Y, theta, #' title("Predicting cell cycle phase for NA18511", outer=TRUE) #' #' @export -#' -#' @import methods Biobase MASS Matrix ggplot2 -NULL cycle_npreg_outsample <- function(Y_test, sigma_est, funs_est, @@ -328,6 +327,8 @@ cycle_npreg_outsample <- function(Y_test, #------ Supporting functions +#' @name initialize_grids +#' #' @title For prediction, initialize grid points for cell cycle phase #' on a circle. #' @@ -346,10 +347,6 @@ cycle_npreg_outsample <- function(Y_test, #' #' @author Joyce Hsiao #' -#' @importFrom stats prcomp -#' @importFrom circular coord2rad -#' @import methods Biobase MASS Matrix ggplot2 -NULL initialize_grids <- function(Y, grids=100, method.grid=c("pca", "uniform")) { @@ -382,6 +379,8 @@ initialize_grids <- function(Y, grids=100, return(theta_initial) } +#' @name cycle_npreg_loglik +#' #' @title Infer angles or cell cycle phase based on gene expression data #' #' @param Y Gene by sample expression matrix. @@ -405,9 +404,6 @@ initialize_grids <- function(Y, grids=100, #' to each bin.} #' #' @author Joyce Hsiao -#' @importFrom stats dnorm -#' @import methods Biobase MASS Matrix ggplot2 -NULL cycle_npreg_loglik <- function(Y, sigma_est, funs_est, grids=100, method.grid=c("pca", "uniform")) { @@ -467,6 +463,8 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est, prob_per_cell_by_celltimes=prob_per_cell_by_celltimes)) } +#' @name cycle_npreg_mstep +#' #' @title Estimate parameters of the cyclic trends #' #' @description This is used in both cycle_npreg_insample (training @@ -500,14 +498,10 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est, #' each gene} #' \item{funs}{Estimated cyclic functions} #' -#' -#' @author Joyce Hsiao -#' #' @importFrom assertthat assert_that #' @importFrom parallel mclapply -#' @importFrom stats approxfun -#' @import methods Biobase MASS Matrix ggplot2 -NULL +#' @importFrom stats dnorm approxfun +#' @author Joyce Hsiao cycle_npreg_mstep <- function(Y, theta, method.trend=c("trendfilter", "loess", "bspline"), polyorder=2, diff --git a/R/data.R b/R/data.R index aa3ebaa..b886549 100644 --- a/R/data.R +++ b/R/data.R @@ -1,16 +1,16 @@ -#' Molecule counts of the 101 significant cyclical genes in 888 samples analyzed -#' in the study. +#' Molecule counts of the 101 significant cyclical genes in the 888 samples +#' analyzed in the study. #' #' An ExpressionSet object (require Biobase package) including -#' molecule count data after gene and smaple filtering. The `phenotypeData()` slot -#' contains sample phenotype information and the `featureData()` slot contains -#' gene feature information. +#' molecule count data after gene and smaple filtering. The `phenotypeData()` +#' slot contains sample phenotype information and the `featureData()` slot +#' contains gene feature information. #' -#' @format An ExpressionSet object with 888 samples and the 101 significant cyclic genes, +#' @format An ExpressionSet object with 888 samples and the 101 significant +#' cyclic genes, #' \describe{ -#' \item{pData(eset_sub)$theta}{Inferred angles of each cell along +#' \item{theta}{Inferred angles of each cell along #' a circle, also known as FUCCI phase.} -#' \item{exprs(eset_sub)}{Molecule counts of the 101 significant cyclical genes.} #' } #' #' @docType data @@ -20,13 +20,14 @@ #' @title Traing model results among samples from 5 individuals. #' -#' @description Pre-computed results. Applied \code{cycle_npreg_insample} to +#' @description Pre-computed results. Applied \emph{cycle_npreg_insample} to #' obtain gene-specific cyclic trend parameters using samples from 5 #' individuals #' #' @format A list with the follwing elements #' \describe{ -#' \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene expression values} +#' \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene +#' expression values} #' \item{theta}{a vector of cell cycl phase values (range between 0 to 2pi)} #' \item{sigma_est}{a vector of estimated standard errors} #' \item{funs_est}{a list of estimated cyclic functions} @@ -40,30 +41,32 @@ #' @title Results of predicting cell cycle phase for samples from NA18511. #' -#' @description Pre-computed results. Applied \code{cycle_npreg_outsample} and -#' results stored in \code{fit_train} to predict cell cycle phase for +#' @description Pre-computed results. Applied \emph{cycle_npreg_outsample} and +#' results stored in \emph{fit_train} to predict cell cycle phase for #' single-cell samples of NA19098. #' #' @format A list with the follwing elements #' \describe{ -#' \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene expression values} -#' \item{cell_times_est}{a vector of estimated cell cycl phase values (range between 0 to 2pi)} +#' \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene +#' expression values} +#' \item{cell_times_est}{a vector of estimated cell cycl phase values +#' (range between 0 to 2pi)} #' \item{loglik_est}{a vector of estimated log-likelihoods for each cell} -#' \item{Y_reordered}{Y ordered along columns by \code{cell_times-est}} +#' \item{Y_reordered}{Y ordered along columns by \emph{cell_times-est}} #' \item{cell_times_reordered}{cell_times_est ordered from 0 to 2pi} #' \item{funs_reordered}{A lists of estimated cyclic functions for each gene #' using re-ordered gene expression values (rows of Y_reordered) } -#' \item{mu_reordered}{A data.frame (gene by sample) of estimated expression for each gene -#' across cells in the test data using \code{funs_reordered}} -#' \item{sigma_reordered}{A vector of estimated standard error for each gene in the test data -#' across cells using \code{funs_reordered}} -#' \item{prob_per_cell_by_celltimes}{A data.frame (gene by sample) of log-likelihoods of -#' of gene expression values in the test data} +#' \item{mu_reordered}{A data.frame (gene by sample) of estimated expression +#' for each gene across cells in the test data using \emph{funs_reordered}} +#' \item{sigma_reordered}{A vector of estimated standard error for each gene +#' in the test data across cells using \emph{funs_reordered}} +#' \item{prob_per_cell_by_celltimes}{A data.frame (gene by sample) of +#' log-likelihoods of of gene expression values in the test data} #' } #' #' \describe{ -#' \item{\code{Y_reordered}}{Normalized expression values (log2CPM , )} -#' \item{\code{cell_times_reordered}}{Estimated phase} +#' \item{Y_reordered}{Quntile-normalized expression values} +#' \item{cell_times_reordered}{Estimated phase} #' } #' #' @docType data diff --git a/R/data_transform_quantile.R b/R/data_transform_quantile.R index 22ad2aa..7728b4a 100644 --- a/R/data_transform_quantile.R +++ b/R/data_transform_quantile.R @@ -1,3 +1,5 @@ +#' @name data_transform_quantile +#' #' @title Quantile-normalize log counts of gene expression values #' #' @description @@ -12,6 +14,7 @@ #' #' @examples #' # use our data +#' library(Biobase) #' data(eset_sub) #' #' # normalize expression counts to counts per million @@ -24,10 +27,6 @@ #' #' @author Joyce Hsiao #' @export -#' -#' @importFrom parallel mclapply -#' @import methods Biobase MASS Matrix ggplot2 -NULL data_transform_quantile <- function(Y, ncores=2) { G <- nrow(Y) diff --git a/R/fit_cyclic_many.R b/R/fit_cyclic_many.R index a5a07f3..7e63149 100644 --- a/R/fit_cyclic_many.R +++ b/R/fit_cyclic_many.R @@ -1,3 +1,5 @@ +#' @name fit_cyclical_many +#' #' @title Compute proportation of variance explained by cyclic trends #' in the gene expression levels for each gene. #' @@ -36,6 +38,7 @@ #' @return A vector of proportion of variance explained for each gene. #' #' @examples +#' library(Biobase) #' data(eset_sub) #' pdata <- pData(eset_sub) #' @@ -56,11 +59,6 @@ #' @author Joyce Hsiao #' #' @export -#' -#' @importFrom assertthat assert_that -#' @importFrom parallel mclapply -#' @import methods Biobase MASS Matrix ggplot2 -NULL fit_cyclical_many <- function(Y, theta, polyorder=2, ncores=4) { G <- nrow(Y) N <- ncol(Y) diff --git a/R/fit_cyclic_one.R b/R/fit_cyclic_one.R index 9125657..25509d3 100644 --- a/R/fit_cyclic_one.R +++ b/R/fit_cyclic_one.R @@ -1,3 +1,5 @@ +#' @name fit_trendfilter_generic +#' #' @title Using trendfiltering to estimate cyclic trend of gene #' expression #' @@ -34,6 +36,7 @@ #' trend in the gene expression levels.} #' #' @examples +#' library(Biobase) #' data(eset_sub) #' pdata <- pData(eset_sub) #' @@ -62,14 +65,10 @@ #' abline(h=0, lty=1, col="black", lwd=.7) #' #' @author Joyce Hsiao -#' @export #' -#' @importFrom stats var -#' @importFrom genlasso trendfilter -#' @importFrom genlasso cv.trendfilter -#' @importFrom genlasso predict.genlasso -#' @import methods Biobase MASS Matrix ggplot2 -NULL +#' @importFrom genlasso trendfilter cv.trendfilter +#' @importFrom stats var predict +#' @export fit_trendfilter_generic <- function(yy, polyorder=2) { yy.rep <- rep(yy,3) @@ -88,6 +87,8 @@ fit_trendfilter_generic <- function(yy, polyorder=2) { pve=pve)) } +#' @name fit_bspline +#' #' @title Use bsplies to cyclic trend of gene expression levels #' #' @param yy A vector of gene expression values for one gene. The @@ -102,6 +103,7 @@ fit_trendfilter_generic <- function(yy, polyorder=2) { #' @author Joyce Hsiao #' #' @examples +#' library(Biobase) #' data(eset_sub) #' pdata <- pData(eset_sub) #' @@ -129,12 +131,8 @@ fit_trendfilter_generic <- function(yy, polyorder=2) { #' expression(2*pi))) #' abline(h=0, lty=1, col="black", lwd=.7) #' +#' @importFrom stats smooth.spline var predict #' @export -#' -#' @importFrom stats smooth.spline -#' @importFrom stats predict -#' @import methods Biobase MASS Matrix ggplot2 -NULL fit_bspline <- function(yy, time) { yy.rep <- rep(yy,3) @@ -151,7 +149,9 @@ fit_bspline <- function(yy, time) { pve=pve)) } -#' Use loess to estimate cyclic trends of expression values +#' @name fit_loess +#' +#' @title Use loess to estimate cyclic trends of expression values #' #' @param yy A vector of gene expression values for one gene. The #' expression values are assumed to have been normalized and @@ -165,6 +165,7 @@ fit_bspline <- function(yy, time) { #' @author Joyce Hsiao #' #' @examples +#' library(Biobase) #' data(eset_sub) #' pdata <- pData(eset_sub) #' @@ -192,11 +193,8 @@ fit_bspline <- function(yy, time) { #' expression(2*pi))) #' abline(h=0, lty=1, col="black", lwd=.7) #' +#' @importFrom stats loess var predict #' @export -#' -#' @importFrom stats loess -#' @import methods Biobase MASS Matrix ggplot2 -NULL fit_loess <- function(yy, time) { yy.rep <- rep(yy,3) diff --git a/R/intensity2circle.R b/R/intensity2circle.R index 5cde843..95de601 100644 --- a/R/intensity2circle.R +++ b/R/intensity2circle.R @@ -19,6 +19,8 @@ #' #' @examples #' # use our data +#' library(Biobase) +#' #' data(eset_sub) #' #' # FUCCI scores - log10 transformed sum of intensities that were diff --git a/R/shift_origin.R b/R/shift_origin.R index b80ca2b..10fce77 100644 --- a/R/shift_origin.R +++ b/R/shift_origin.R @@ -1,3 +1,5 @@ +#' @name shift_origin +#' #' @title Shift origin of the angles #' #' @description Shift origin of the angles for visualization diff --git a/man/cycle_npreg_insample.Rd b/man/cycle_npreg_insample.Rd index bd1b381..176cc39 100644 --- a/man/cycle_npreg_insample.Rd +++ b/man/cycle_npreg_insample.Rd @@ -40,8 +40,8 @@ Estimates cyclic trends of gene expression levels using training data. } \examples{ -# import data library(Biobase) +# import data data(eset_sub) # select top 5 cyclic genes diff --git a/man/fit_bspline.Rd b/man/fit_bspline.Rd index 2a350a7..6c19ab8 100644 --- a/man/fit_bspline.Rd +++ b/man/fit_bspline.Rd @@ -21,6 +21,7 @@ estimated cyclic trend. Use bsplies to cyclic trend of gene expression levels } \examples{ +library(Biobase) data(eset_sub) pdata <- pData(eset_sub) diff --git a/man/fit_cyclical_many.Rd b/man/fit_cyclical_many.Rd index ccfed39..29bf243 100644 --- a/man/fit_cyclical_many.Rd +++ b/man/fit_cyclical_many.Rd @@ -45,6 +45,7 @@ quadratic trend filtering was applied to the concatenated data series of each gene. } \examples{ +library(Biobase) data(eset_sub) pdata <- pData(eset_sub) diff --git a/man/fit_loess.Rd b/man/fit_loess.Rd index 1eae23b..e008a54 100644 --- a/man/fit_loess.Rd +++ b/man/fit_loess.Rd @@ -21,6 +21,7 @@ estimated cyclic trend. Use loess to estimate cyclic trends of expression values } \examples{ +library(Biobase) data(eset_sub) pdata <- pData(eset_sub) diff --git a/man/fit_trendfilter_generic.Rd b/man/fit_trendfilter_generic.Rd index b0486f0..1efca51 100644 --- a/man/fit_trendfilter_generic.Rd +++ b/man/fit_trendfilter_generic.Rd @@ -43,6 +43,7 @@ quadratic trend filtering was applied to the concatenated data series of each gene. } \examples{ +library(Biobase) data(eset_sub) pdata <- pData(eset_sub) @@ -70,7 +71,6 @@ axis(1,at=c(0,pi/2, pi, 3*pi/2, 2*pi), expression(2*pi))) abline(h=0, lty=1, col="black", lwd=.7) - } \author{ Joyce Hsiao diff --git a/man/intensity2circle.Rd b/man/intensity2circle.Rd index a95d387..38d4304 100644 --- a/man/intensity2circle.Rd +++ b/man/intensity2circle.Rd @@ -30,6 +30,8 @@ cell cycle progression. } \examples{ # use our data +library(Biobase) + data(eset_sub) # FUCCI scores - log10 transformed sum of intensities that were From 09d17e29cbfe34bb15bfa02db6a514ff58332838 Mon Sep 17 00:00:00 2001 From: jhsiao999 Date: Thu, 12 Sep 2019 20:33:54 -0500 Subject: [PATCH 2/6] fix data.R roxygen --- R/data.R | 2 -- man/eset_sub.Rd | 16 ++++++++-------- man/fit_predict.Rd | 28 +++++++++++++++------------- man/fit_train.Rd | 5 +++-- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/R/data.R b/R/data.R index b886549..0cebce1 100644 --- a/R/data.R +++ b/R/data.R @@ -36,7 +36,6 @@ #' @docType data #' #' @keywords data -#' "fit_train" #' @title Results of predicting cell cycle phase for samples from NA18511. @@ -72,5 +71,4 @@ #' @docType data #' #' @keywords data -#' "fit_predict" diff --git a/man/eset_sub.Rd b/man/eset_sub.Rd index ae17c21..9f67cc4 100644 --- a/man/eset_sub.Rd +++ b/man/eset_sub.Rd @@ -3,21 +3,21 @@ \docType{data} \name{eset_sub} \alias{eset_sub} -\title{Molecule counts of the 101 significant cyclical genes in 888 samples analyzed -in the study.} -\format{An ExpressionSet object with 888 samples and the 101 significant cyclic genes, +\title{Molecule counts of the 101 significant cyclical genes in the 888 samples +analyzed in the study.} +\format{An ExpressionSet object with 888 samples and the 101 significant + cyclic genes, \describe{ - \item{pData(eset_sub)$theta}{Inferred angles of each cell along + \item{theta}{Inferred angles of each cell along a circle, also known as FUCCI phase.} - \item{exprs(eset_sub)}{Molecule counts of the 101 significant cyclical genes.} }} \usage{ eset_sub } \description{ An ExpressionSet object (require Biobase package) including -molecule count data after gene and smaple filtering. The `phenotypeData()` slot -contains sample phenotype information and the `featureData()` slot contains -gene feature information. +molecule count data after gene and smaple filtering. The `phenotypeData()` +slot contains sample phenotype information and the `featureData()` slot +contains gene feature information. } \keyword{data} diff --git a/man/fit_predict.Rd b/man/fit_predict.Rd index d1972b8..0f08eff 100644 --- a/man/fit_predict.Rd +++ b/man/fit_predict.Rd @@ -6,31 +6,33 @@ \title{Results of predicting cell cycle phase for samples from NA18511.} \format{A list with the follwing elements \describe{ - \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene expression values} - \item{cell_times_est}{a vector of estimated cell cycl phase values (range between 0 to 2pi)} + \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene + expression values} + \item{cell_times_est}{a vector of estimated cell cycl phase values + (range between 0 to 2pi)} \item{loglik_est}{a vector of estimated log-likelihoods for each cell} - \item{Y_reordered}{Y ordered along columns by \code{cell_times-est}} + \item{Y_reordered}{Y ordered along columns by \emph{cell_times-est}} \item{cell_times_reordered}{cell_times_est ordered from 0 to 2pi} \item{funs_reordered}{A lists of estimated cyclic functions for each gene using re-ordered gene expression values (rows of Y_reordered) } - \item{mu_reordered}{A data.frame (gene by sample) of estimated expression for each gene - across cells in the test data using \code{funs_reordered}} - \item{sigma_reordered}{A vector of estimated standard error for each gene in the test data - across cells using \code{funs_reordered}} - \item{prob_per_cell_by_celltimes}{A data.frame (gene by sample) of log-likelihoods of - of gene expression values in the test data} + \item{mu_reordered}{A data.frame (gene by sample) of estimated expression + for each gene across cells in the test data using \emph{funs_reordered}} + \item{sigma_reordered}{A vector of estimated standard error for each gene + in the test data across cells using \emph{funs_reordered}} + \item{prob_per_cell_by_celltimes}{A data.frame (gene by sample) of + log-likelihoods of of gene expression values in the test data} } \describe{ - \item{\code{Y_reordered}}{Normalized expression values (log2CPM , )} - \item{\code{cell_times_reordered}}{Estimated phase} + \item{Y_reordered}{Quntile-normalized expression values} + \item{cell_times_reordered}{Estimated phase} }} \usage{ fit_predict } \description{ -Pre-computed results. Applied \code{cycle_npreg_outsample} and - results stored in \code{fit_train} to predict cell cycle phase for +Pre-computed results. Applied \emph{cycle_npreg_outsample} and + results stored in \emph{fit_train} to predict cell cycle phase for single-cell samples of NA19098. } \keyword{data} diff --git a/man/fit_train.Rd b/man/fit_train.Rd index 2d65427..18a8f2c 100644 --- a/man/fit_train.Rd +++ b/man/fit_train.Rd @@ -6,7 +6,8 @@ \title{Traing model results among samples from 5 individuals.} \format{A list with the follwing elements \describe{ - \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene expression values} + \item{Y}{a data.frame (gene by sample) of quantile-normailzed gene + expression values} \item{theta}{a vector of cell cycl phase values (range between 0 to 2pi)} \item{sigma_est}{a vector of estimated standard errors} \item{funs_est}{a list of estimated cyclic functions} @@ -15,7 +16,7 @@ fit_train } \description{ -Pre-computed results. Applied \code{cycle_npreg_insample} to +Pre-computed results. Applied \emph{cycle_npreg_insample} to obtain gene-specific cyclic trend parameters using samples from 5 individuals } From afff0ec9c2b3447a37b0b4c4404b331550537e88 Mon Sep 17 00:00:00 2001 From: jhsiao999 Date: Thu, 12 Sep 2019 20:48:53 -0500 Subject: [PATCH 3/6] remove unused large files --- R/cycle_npreg.R | 2 ++ R/data_transform_quantile.R | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/cycle_npreg.R b/R/cycle_npreg.R index b57f148..8ab1c34 100644 --- a/R/cycle_npreg.R +++ b/R/cycle_npreg.R @@ -116,6 +116,7 @@ #' #' @author Joyce Hsiao #' +#' @import Biobase #' @export cycle_npreg_insample <- function(Y, theta, ncores=4, @@ -277,6 +278,7 @@ cycle_npreg_insample <- function(Y, theta, #' } #' title("Predicting cell cycle phase for NA18511", outer=TRUE) #' +#' @import Biobase #' @export cycle_npreg_outsample <- function(Y_test, sigma_est, diff --git a/R/data_transform_quantile.R b/R/data_transform_quantile.R index 7728b4a..83b3b61 100644 --- a/R/data_transform_quantile.R +++ b/R/data_transform_quantile.R @@ -26,6 +26,8 @@ #' ylab = "After quantile-normalization") #' #' @author Joyce Hsiao +#' +#' @import Biobase #' @export data_transform_quantile <- function(Y, ncores=2) { G <- nrow(Y) From 3d8d65474010643373a58ce5dca8c331ce008c33 Mon Sep 17 00:00:00 2001 From: jhsiao999 Date: Thu, 12 Sep 2019 20:57:42 -0500 Subject: [PATCH 4/6] more small fixes --- NAMESPACE | 2 ++ R/cycle_npreg.R | 32 +++++++++++++++++++++----------- R/data_transform_quantile.R | 1 + vignettes/vignette.Rmd | 2 +- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 89593ff..277b446 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,8 @@ export(fit_trendfilter_generic) export(intensity2circle) export(rotation) export(shift_origin) +import(Biobase) +import(methods) importFrom(assertthat,assert_that) importFrom(circular,coord2rad) importFrom(conicfit,AtoG) diff --git a/R/cycle_npreg.R b/R/cycle_npreg.R index 8ab1c34..7f3231e 100644 --- a/R/cycle_npreg.R +++ b/R/cycle_npreg.R @@ -46,7 +46,8 @@ #' data(eset_sub) #' #' # select top 5 cyclic genes -#' eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] +#' eset_top5 <- eset_sub[ +#' order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] #' #' # normalize molecule count for differencese in library sizes #' counts_normed <- t((10^6)*(t(exprs(eset_top5))/pData(eset_top5)$molecules)) @@ -56,7 +57,8 @@ #' pdata <- pData(eset_top5)[order(pData(eset_top5)$theta_shifted),] #' #' # quantile-transform each gene to normal distribution -#' expr_quant <- do.call(rbind, lapply(seq_len(nrow(counts_normed)), function(g) { +#' expr_quant <- do.call(rbind, +#' lapply(seq_len(nrow(counts_normed)), function(g) { #' yy <- counts_normed[g,] #' is.zero <- which(yy == 0) #' qq.map <- qqnorm(yy, plot.it = FALSE) @@ -72,7 +74,8 @@ #' which_samples_train <- rownames(pdata)[which(pdata$chip_id != "NA18511")] #' which_samples_predict <- rownames(pdata)[which(pdata$chip_id == "NA18511")] #' -#' # make an example of using data from 5 individuals to predict phase in one indivdual +#' # make an example of using data from 5 individuals to predict +#' # phase in one indivdual #' Y_train <- expr_quant[, which(colnames(expr_quant) %in% which_samples_train)] #' theta_train <- pdata$theta_shifted[which(rownames(pdata) %in% which_samples_train)] #' names(theta_train) <- rownames(pdata)[which(rownames(pdata) %in% which_samples_train)] @@ -84,7 +87,8 @@ #' ncores=2, #' method.trend="trendfilter") #' -#' Y_predict <- expr_quant[, which(colnames(expr_quant) %in% which_samples_predict)] +#' Y_predict <- expr_quant[, +#' which(colnames(expr_quant) %in% which_samples_predict)] #' #' theta_test <- pdata$theta[which(rownames(pdata) %in% which_samples_predict)] #' names(theta_test) <- rownames(pdata)[which(rownames(pdata) %in% which_samples_predict)] @@ -117,6 +121,7 @@ #' @author Joyce Hsiao #' #' @import Biobase +#' @import methods #' @export cycle_npreg_insample <- function(Y, theta, ncores=4, @@ -210,7 +215,8 @@ cycle_npreg_insample <- function(Y, theta, #' data(eset_sub) #' #' # select top 5 cyclic genes -#' eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] +#' eset_top5 <- eset_sub[ +#' order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] #' #' # normalize molecule count for differencese in library sizes #' counts_normed <- t((10^6)*(t(exprs(eset_top5))/pData(eset_top5)$molecules)) @@ -220,7 +226,8 @@ cycle_npreg_insample <- function(Y, theta, #' pdata <- pData(eset_top5)[order(pData(eset_top5)$theta_shifted),] #' #' # quantile-transform each gene to normal distribution -#' expr_quant <- do.call(rbind, lapply(seq_len(nrow(counts_normed)), function(g) { +#' expr_quant <- do.call(rbind, +#' lapply(seq_len(nrow(counts_normed)), function(g) { #' yy <- counts_normed[g,] #' is.zero <- which(yy == 0) #' qq.map <- qqnorm(yy, plot.it = FALSE) @@ -248,7 +255,8 @@ cycle_npreg_insample <- function(Y, theta, #' ncores=2, #' method.trend="trendfilter") #' -#' Y_predict <- expr_quant[, which(colnames(expr_quant) %in% which_samples_predict)] +#' Y_predict <- expr_quant[, +#' which(colnames(expr_quant) %in% which_samples_predict)] #' #' theta_test <- pdata$theta[which(rownames(pdata) %in% which_samples_predict)] #' names(theta_test) <- rownames(pdata)[which(rownames(pdata) %in% which_samples_predict)] @@ -279,6 +287,7 @@ cycle_npreg_insample <- function(Y, theta, #' title("Predicting cell cycle phase for NA18511", outer=TRUE) #' #' @import Biobase +#' @import methods #' @export cycle_npreg_outsample <- function(Y_test, sigma_est, @@ -481,10 +490,11 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est, #' @param method.trend How to estimate cyclic trend of gene expression values? #' We offer three options: 'trendfilter' (\code{fit_trendfilter_generic()}), #' 'loess' (\code{fit_loess()}) and 'bsplines' (\code{fit_bspline()}). -#' 'trendfilter' provided the best fit in our study. But 'trendfilter` requires -#' cross-validation and take some time. Therefore, we recommend using bspline for -#' quick results. -#' @param ncores How many computing cores to use? We use mclapply function for parallel computing. +#' 'trendfilter' provided the best fit in our study. But 'trendfilter` +#' uses cross-validation and takes some time. Therefore, we recommend +#' using bspline for quick results. +#' @param ncores How many computing cores to use? We use mclapply function for +#' parallel computing. #' #' @inheritParams fit_trendfilter_generic #' @inheritParams fit_bspline diff --git a/R/data_transform_quantile.R b/R/data_transform_quantile.R index 83b3b61..083b9a8 100644 --- a/R/data_transform_quantile.R +++ b/R/data_transform_quantile.R @@ -28,6 +28,7 @@ #' @author Joyce Hsiao #' #' @import Biobase +#' @import methods #' @export data_transform_quantile <- function(Y, ncores=2) { G <- nrow(Y) diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index 9b34eeb..d594ac5 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -50,7 +50,7 @@ pdata <- pData(eset) fdata <- fData(eset) # select top 5 cyclic genes -eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=T)[c(1:5)],] +eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] # normalize molecule count for differencese in library sizes counts_normed <- t((10^6)*(t(exprs(eset_top5))/pData(eset_top5)$molecules)) From 1b584dc02a3374ef0f4bcb2bf16a2f1e85964742 Mon Sep 17 00:00:00 2001 From: jhsiao999 Date: Fri, 13 Sep 2019 00:09:44 -0500 Subject: [PATCH 5/6] even more fixes --- DESCRIPTION | 2 ++ NAMESPACE | 4 ++- R/cycle_npreg.R | 58 ++++++++++++++++++++++++++++++---- R/data.R | 6 ++++ R/data_transform_quantile.R | 33 ++++++++++++++++--- R/fit_cyclic_many.R | 36 ++++++++++++++++----- man/cycle_npreg_insample.Rd | 14 +++++--- man/cycle_npreg_mstep.Rd | 9 +++--- man/cycle_npreg_outsample.Rd | 11 ++++--- man/data_transform_quantile.Rd | 2 +- man/eset_sub.Rd | 2 +- man/fit_cyclical_many.Rd | 6 ++-- man/fit_predict.Rd | 2 +- man/fit_train.Rd | 2 +- 14 files changed, 148 insertions(+), 39 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 069f441..da6a04b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,8 @@ Imports: Biobase, circular, conicfit, + doParallel, + foreach, genlasso, graphics, methods, diff --git a/NAMESPACE b/NAMESPACE index 277b446..f7203ce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,7 +12,10 @@ export(intensity2circle) export(rotation) export(shift_origin) import(Biobase) +import(doParallel) +import(foreach) import(methods) +import(parallel) importFrom(assertthat,assert_that) importFrom(circular,coord2rad) importFrom(conicfit,AtoG) @@ -23,7 +26,6 @@ importFrom(genlasso,cv.trendfilter) importFrom(genlasso,trendfilter) importFrom(graphics,plot) importFrom(graphics,points) -importFrom(parallel,mclapply) importFrom(stats,approxfun) importFrom(stats,dnorm) importFrom(stats,loess) diff --git a/R/cycle_npreg.R b/R/cycle_npreg.R index 7f3231e..37ff8e4 100644 --- a/R/cycle_npreg.R +++ b/R/cycle_npreg.R @@ -17,7 +17,7 @@ #' #' @param theta A vector of angles. #' -#' @param ncores We use mclapply function for parallel computing. +#' @param ncores We use doParallel package for parallel computing. #' #' @param polyorder We estimate cyclic trends of gene expression #' levels using nonparamtric trend filtering. The default fits second @@ -122,6 +122,7 @@ #' #' @import Biobase #' @import methods +#' #' @export cycle_npreg_insample <- function(Y, theta, ncores=4, @@ -176,7 +177,7 @@ cycle_npreg_insample <- function(Y, theta, #' @param get_trend_estimates To re-estimate the cylic trend based on #' the predicted cell cycle phase or not (T or F). Default FALSE. This step #' calls trendfilter and is computationally intensive. -#' @param ncores We use mclapply function for parallel computing. +#' @param ncores We use doParallel package for parallel computing. #' #' @inheritParams cycle_npreg_mstep #' @inheritParams cycle_npreg_loglik @@ -493,7 +494,7 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est, #' 'trendfilter' provided the best fit in our study. But 'trendfilter` #' uses cross-validation and takes some time. Therefore, we recommend #' using bspline for quick results. -#' @param ncores How many computing cores to use? We use mclapply function for +#' @param ncores How many computing cores to use? We use doParallel package for #' parallel computing. #' #' @inheritParams fit_trendfilter_generic @@ -511,14 +512,27 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est, #' \item{funs}{Estimated cyclic functions} #' #' @importFrom assertthat assert_that -#' @importFrom parallel mclapply #' @importFrom stats dnorm approxfun +#' @import doParallel +#' @import parallel +#' @import foreach +#' #' @author Joyce Hsiao cycle_npreg_mstep <- function(Y, theta, method.trend=c("trendfilter", "loess", "bspline"), polyorder=2, ncores=4) { + if (is.null(ncores)) { + cl <- parallel::makeCluster(2) + doParallel::registerDoParallel(cl) + message(paste("computing on",ncores,"cores")) + } else { + cl <- parallel::makeCluster(ncores) + doParallel::registerDoParallel(cl) + message(paste("computing on",ncores,"cores")) + } + G <- nrow(Y) N <- ncol(Y) @@ -535,7 +549,38 @@ cycle_npreg_mstep <- function(Y, theta, method.trend=c("trendfilter", # for each gene, estimate the cyclical pattern of gene expression # conditioned on the given cell times - fit <- mclapply(seq_len(G), function(g) { + # fit <- mclapply(seq_len(G), function(g) { + # y_g <- Y_ordered[g,] + # + # if (method.trend=="trendfilter") { + # fit_g <- fit_trendfilter_generic(yy=y_g, polyorder = polyorder) + # fun_g <- approxfun(x=as.numeric(theta_ordered), + # y=as.numeric(fit_g$trend.yy), rule=2) + # mu_g <- fit_g$trend.yy + # } + # if (method.trend=="bspline") { + # fit_g <- fit_bspline(yy=y_g, time = theta_ordered) + # fun_g <- approxfun(x=as.numeric(theta_ordered), + # y=as.numeric(fit_g$pred.yy), rule=2) + # mu_g <- fit_g$pred.yy + # } + # + # if (method.trend=="loess") { + # fit_g <- fit_loess(yy=y_g, time = theta_ordered) + # fun_g <- approxfun(x=as.numeric(theta_ordered), + # y=as.numeric(fit_g$pred.yy), rule=2) + # mu_g <- fit_g$pred.yy + # } + # + # sigma_g <- sqrt(sum((y_g-mu_g)^2)/N) + # + # list(y_g =y_g, + # mu_g=mu_g, + # sigma_g=sigma_g, + # fun_g=fun_g) + # }, mc.cores = ncores) + + fit <- foreach::foreach(g=seq_len(G)) %dopar% { y_g <- Y_ordered[g,] if (method.trend=="trendfilter") { @@ -564,7 +609,8 @@ cycle_npreg_mstep <- function(Y, theta, method.trend=c("trendfilter", mu_g=mu_g, sigma_g=sigma_g, fun_g=fun_g) - }, mc.cores = ncores) + } + parallel::stopCluster(cl) sigma_est <- sapply(fit, "[[", "sigma_g") names(sigma_est) <- rownames(Y_ordered) diff --git a/R/data.R b/R/data.R index 0cebce1..5d108c0 100644 --- a/R/data.R +++ b/R/data.R @@ -15,6 +15,8 @@ #' #' @docType data #' +#' @usage data(eset_sub) +#' #' @keywords data "eset_sub" @@ -35,6 +37,8 @@ #' #' @docType data #' +#' @usage data(fit_train) +#' #' @keywords data "fit_train" @@ -70,5 +74,7 @@ #' #' @docType data #' +#' @usage data(fit_predit) +#' #' @keywords data "fit_predict" diff --git a/R/data_transform_quantile.R b/R/data_transform_quantile.R index 083b9a8..53e3de3 100644 --- a/R/data_transform_quantile.R +++ b/R/data_transform_quantile.R @@ -8,7 +8,7 @@ #' #' @param Y A gene by sample matrix. Contains library size-normalized #' molecule counts. -#' @param ncores We use mclapply function for parallel computing. +#' @param ncores We use doParallel package for parallel computing. #' #' @return A gene by sample expression matrix. #' @@ -29,20 +29,45 @@ #' #' @import Biobase #' @import methods +#' @import doParallel +#' @import parallel +#' @import foreach #' @export data_transform_quantile <- function(Y, ncores=2) { + + if (is.null(ncores)) { + cl <- parallel::makeCluster(2) + doParallel::registerDoParallel(cl) + message(paste("computing on",ncores,"cores")) + } else { + cl <- parallel::makeCluster(ncores) + doParallel::registerDoParallel(cl) + message(paste("computing on",ncores,"cores")) + } + G <- nrow(Y) - df <- mclapply(seq_len(G), function(g) { + # df <- mclapply(seq_len(G), function(g) { + # y_g <- Y[g,] + # is.zero <- which(y_g == 0) + # qq.map <- stats::qqnorm(y_g, plot.it=FALSE) + # yy.qq <- qq.map$x + # yy.qq[is.zero] <- sample(qq.map$x[is.zero]) + # return(y_g= yy.qq) + # }, mc.cores = ncores) + df <- foreach::foreach(g=seq_len(G)) %dopar% { y_g <- Y[g,] is.zero <- which(y_g == 0) - qq.map <- stats::qqnorm(y_g) + qq.map <- stats::qqnorm(y_g, plot.it=FALSE) yy.qq <- qq.map$x yy.qq[is.zero] <- sample(qq.map$x[is.zero]) return(y_g= yy.qq) - }, mc.cores = ncores) + } + parallel::stopCluster(cl) + df <- do.call(rbind, df) colnames(df) <- colnames(Y) + rownames(df) <- rownames(Y) return(df) } diff --git a/R/fit_cyclic_many.R b/R/fit_cyclic_many.R index 7e63149..c6393d6 100644 --- a/R/fit_cyclic_many.R +++ b/R/fit_cyclic_many.R @@ -32,7 +32,7 @@ #' levels using nonparamtric trend filtering. The default fits second #' degree polynomials. #' -#' @param ncores mclapply from the parallel package is used to perform +#' @param ncores doParallel package is used to perform #' parallel computing to reduce computational time. #' #' @return A vector of proportion of variance explained for each gene. @@ -54,12 +54,26 @@ #' theta_ordered <- theta[order(theta)] #' yy_ordered <- counts_quant[,match(names(theta_ordered), colnames(counts_quant))] #' -#' fit <- fit_cyclical_many(yy_ordered, theta=theta_ordered) +#' fit <- fit_cyclical_many(Y=yy_ordered, theta=theta_ordered) #' #' @author Joyce Hsiao #' +#' @import doParallel +#' @import foreach +#' @import parallel #' @export -fit_cyclical_many <- function(Y, theta, polyorder=2, ncores=4) { +fit_cyclical_many <- function(Y, theta, polyorder=2, ncores=2) { + + if (is.null(ncores)) { + cl <- parallel::makeCluster(2) + doParallel::registerDoParallel(cl) + message(paste("computing on",ncores,"cores")) + } else { + cl <- parallel::makeCluster(ncores) + doParallel::registerDoParallel(cl) + message(paste("computing on",ncores,"cores")) + } + G <- nrow(Y) N <- ncol(Y) @@ -76,11 +90,17 @@ fit_cyclical_many <- function(Y, theta, polyorder=2, ncores=4) { # For each gene, estimate the cyclical pattern of gene expression # conditioned on the given cell times. - fit <- mclapply(seq_len(G), function(g) { - y_g <- Y_ordered[g,] - fit_g <- fit_trendfilter_generic(yy=y_g, polyorder = polyorder) - return(fit_g$pve) - }, mc.cores = ncores) + fit <- foreach::foreach(g=seq_len(G)) %dopar% { + y_g <- Y_ordered[g,] + fit_g <- fit_trendfilter_generic(yy=y_g, polyorder = polyorder) + return(unlist(fit_g$pve)) + } + parallel::stopCluster(cl) + # fit <- mclapply(seq_len(G), function(g) { + # y_g <- Y_ordered[g,] + # fit_g <- fit_trendfilter_generic(yy=y_g, polyorder = polyorder) + # return(fit_g$pve) + # }, mc.cores = ncores) out <- do.call(rbind, fit) out <- as.data.frame(out) diff --git a/man/cycle_npreg_insample.Rd b/man/cycle_npreg_insample.Rd index 176cc39..fa51df4 100644 --- a/man/cycle_npreg_insample.Rd +++ b/man/cycle_npreg_insample.Rd @@ -13,7 +13,7 @@ values. Gene by sample.} \item{theta}{A vector of angles.} -\item{ncores}{We use mclapply function for parallel computing.} +\item{ncores}{We use doParallel package for parallel computing.} \item{polyorder}{We estimate cyclic trends of gene expression levels using nonparamtric trend filtering. The default fits second @@ -45,7 +45,8 @@ library(Biobase) data(eset_sub) # select top 5 cyclic genes -eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] +eset_top5 <- eset_sub[ + order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] # normalize molecule count for differencese in library sizes counts_normed <- t((10^6)*(t(exprs(eset_top5))/pData(eset_top5)$molecules)) @@ -55,7 +56,8 @@ counts_normed <- counts_normed[,order(pData(eset_top5)$theta_shifted)] pdata <- pData(eset_top5)[order(pData(eset_top5)$theta_shifted),] # quantile-transform each gene to normal distribution -expr_quant <- do.call(rbind, lapply(seq_len(nrow(counts_normed)), function(g) { +expr_quant <- do.call(rbind, + lapply(seq_len(nrow(counts_normed)), function(g) { yy <- counts_normed[g,] is.zero <- which(yy == 0) qq.map <- qqnorm(yy, plot.it = FALSE) @@ -71,7 +73,8 @@ colnames(expr_quant) <- colnames(counts_normed) which_samples_train <- rownames(pdata)[which(pdata$chip_id != "NA18511")] which_samples_predict <- rownames(pdata)[which(pdata$chip_id == "NA18511")] -# make an example of using data from 5 individuals to predict phase in one indivdual +# make an example of using data from 5 individuals to predict +# phase in one indivdual Y_train <- expr_quant[, which(colnames(expr_quant) \%in\% which_samples_train)] theta_train <- pdata$theta_shifted[which(rownames(pdata) \%in\% which_samples_train)] names(theta_train) <- rownames(pdata)[which(rownames(pdata) \%in\% which_samples_train)] @@ -83,7 +86,8 @@ fit_train <- cycle_npreg_insample(Y = Y_train, ncores=2, method.trend="trendfilter") -Y_predict <- expr_quant[, which(colnames(expr_quant) \%in\% which_samples_predict)] +Y_predict <- expr_quant[, + which(colnames(expr_quant) \%in\% which_samples_predict)] theta_test <- pdata$theta[which(rownames(pdata) \%in\% which_samples_predict)] names(theta_test) <- rownames(pdata)[which(rownames(pdata) \%in\% which_samples_predict)] diff --git a/man/cycle_npreg_mstep.Rd b/man/cycle_npreg_mstep.Rd index 75c3ce0..3b854dd 100644 --- a/man/cycle_npreg_mstep.Rd +++ b/man/cycle_npreg_mstep.Rd @@ -15,15 +15,16 @@ cycle_npreg_mstep(Y, theta, method.trend = c("trendfilter", "loess", \item{method.trend}{How to estimate cyclic trend of gene expression values? We offer three options: 'trendfilter' (\code{fit_trendfilter_generic()}), 'loess' (\code{fit_loess()}) and 'bsplines' (\code{fit_bspline()}). -'trendfilter' provided the best fit in our study. But 'trendfilter` requires -cross-validation and take some time. Therefore, we recommend using bspline for -quick results.} +'trendfilter' provided the best fit in our study. But 'trendfilter` +uses cross-validation and takes some time. Therefore, we recommend +using bspline for quick results.} \item{polyorder}{We estimate cyclic trends of gene expression levels using nonparamtric trend filtering. The default fits second degree polynomials.} -\item{ncores}{How many computing cores to use? We use mclapply function for parallel computing.} +\item{ncores}{How many computing cores to use? We use doParallel package for +parallel computing.} } \value{ A list with the following elements: diff --git a/man/cycle_npreg_outsample.Rd b/man/cycle_npreg_outsample.Rd index 32b1fb3..cbf4604 100644 --- a/man/cycle_npreg_outsample.Rd +++ b/man/cycle_npreg_outsample.Rd @@ -26,7 +26,7 @@ nonparamtric trend filtering. The default fits second degree polynomials.} \item{method.grid}{Method for defining bins along the circle.} -\item{ncores}{We use mclapply function for parallel computing.} +\item{ncores}{We use doParallel package for parallel computing.} \item{grids}{number of bins to be selected along 0 to 2pi.} @@ -74,7 +74,8 @@ library(Biobase) data(eset_sub) # select top 5 cyclic genes -eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] +eset_top5 <- eset_sub[ + order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],] # normalize molecule count for differencese in library sizes counts_normed <- t((10^6)*(t(exprs(eset_top5))/pData(eset_top5)$molecules)) @@ -84,7 +85,8 @@ counts_normed <- counts_normed[,order(pData(eset_top5)$theta_shifted)] pdata <- pData(eset_top5)[order(pData(eset_top5)$theta_shifted),] # quantile-transform each gene to normal distribution -expr_quant <- do.call(rbind, lapply(seq_len(nrow(counts_normed)), function(g) { +expr_quant <- do.call(rbind, +lapply(seq_len(nrow(counts_normed)), function(g) { yy <- counts_normed[g,] is.zero <- which(yy == 0) qq.map <- qqnorm(yy, plot.it = FALSE) @@ -112,7 +114,8 @@ fit_train <- cycle_npreg_insample(Y = Y_train, ncores=2, method.trend="trendfilter") -Y_predict <- expr_quant[, which(colnames(expr_quant) \%in\% which_samples_predict)] +Y_predict <- expr_quant[, + which(colnames(expr_quant) \%in\% which_samples_predict)] theta_test <- pdata$theta[which(rownames(pdata) \%in\% which_samples_predict)] names(theta_test) <- rownames(pdata)[which(rownames(pdata) \%in\% which_samples_predict)] diff --git a/man/data_transform_quantile.Rd b/man/data_transform_quantile.Rd index d3c68f4..e65b95a 100644 --- a/man/data_transform_quantile.Rd +++ b/man/data_transform_quantile.Rd @@ -10,7 +10,7 @@ data_transform_quantile(Y, ncores = 2) \item{Y}{A gene by sample matrix. Contains library size-normalized molecule counts.} -\item{ncores}{We use mclapply function for parallel computing.} +\item{ncores}{We use doParallel package for parallel computing.} } \value{ A gene by sample expression matrix. diff --git a/man/eset_sub.Rd b/man/eset_sub.Rd index 9f67cc4..009347a 100644 --- a/man/eset_sub.Rd +++ b/man/eset_sub.Rd @@ -12,7 +12,7 @@ analyzed in the study.} a circle, also known as FUCCI phase.} }} \usage{ -eset_sub +data(eset_sub) } \description{ An ExpressionSet object (require Biobase package) including diff --git a/man/fit_cyclical_many.Rd b/man/fit_cyclical_many.Rd index 29bf243..e26ef64 100644 --- a/man/fit_cyclical_many.Rd +++ b/man/fit_cyclical_many.Rd @@ -5,7 +5,7 @@ \title{Compute proportation of variance explained by cyclic trends in the gene expression levels for each gene.} \usage{ -fit_cyclical_many(Y, theta, polyorder = 2, ncores = 4) +fit_cyclical_many(Y, theta, polyorder = 2, ncores = 2) } \arguments{ \item{Y}{A matrix (gene by sample) of gene expression values. The @@ -19,7 +19,7 @@ samples.} levels using nonparamtric trend filtering. The default fits second degree polynomials.} -\item{ncores}{mclapply from the parallel package is used to perform +\item{ncores}{doParallel package is used to perform parallel computing to reduce computational time.} } \value{ @@ -61,7 +61,7 @@ counts_quant <- data_transform_quantile(counts_normed, ncores=2) theta_ordered <- theta[order(theta)] yy_ordered <- counts_quant[,match(names(theta_ordered), colnames(counts_quant))] -fit <- fit_cyclical_many(yy_ordered, theta=theta_ordered) +fit <- fit_cyclical_many(Y=yy_ordered, theta=theta_ordered) } \author{ diff --git a/man/fit_predict.Rd b/man/fit_predict.Rd index 0f08eff..5cca3af 100644 --- a/man/fit_predict.Rd +++ b/man/fit_predict.Rd @@ -28,7 +28,7 @@ \item{cell_times_reordered}{Estimated phase} }} \usage{ -fit_predict +data(fit_predit) } \description{ Pre-computed results. Applied \emph{cycle_npreg_outsample} and diff --git a/man/fit_train.Rd b/man/fit_train.Rd index 18a8f2c..7541aaf 100644 --- a/man/fit_train.Rd +++ b/man/fit_train.Rd @@ -13,7 +13,7 @@ \item{funs_est}{a list of estimated cyclic functions} }} \usage{ -fit_train +data(fit_train) } \description{ Pre-computed results. Applied \emph{cycle_npreg_insample} to From 2e24eadeb9438ce13ccd11a1cae83038002cd1eb Mon Sep 17 00:00:00 2001 From: jhsiao999 Date: Fri, 13 Sep 2019 00:47:21 -0500 Subject: [PATCH 6/6] almost done? --- DESCRIPTION | 2 +- R/data.R | 2 +- man/fit_predict.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index da6a04b..12c5980 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,7 @@ Imports: Suggests: knitr, rmarkdown -Enchances: +Enhances: parallel Remotes: glmgen/genlasso diff --git a/R/data.R b/R/data.R index 5d108c0..572bcb7 100644 --- a/R/data.R +++ b/R/data.R @@ -74,7 +74,7 @@ #' #' @docType data #' -#' @usage data(fit_predit) +#' @usage data(fit_predict) #' #' @keywords data "fit_predict" diff --git a/man/fit_predict.Rd b/man/fit_predict.Rd index 5cca3af..5be163f 100644 --- a/man/fit_predict.Rd +++ b/man/fit_predict.Rd @@ -28,7 +28,7 @@ \item{cell_times_reordered}{Estimated phase} }} \usage{ -data(fit_predit) +data(fit_predict) } \description{ Pre-computed results. Applied \emph{cycle_npreg_outsample} and