Skip to content

Commit

Permalink
Merge pull request #16 from jhsiao999/dev
Browse files Browse the repository at this point in the history
Dev update
  • Loading branch information
jhsiao999 authored Sep 13, 2019
2 parents 1fb1c59 + 2e24ead commit cf247e6
Show file tree
Hide file tree
Showing 23 changed files with 287 additions and 161 deletions.
28 changes: 13 additions & 15 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,15 @@ Type: Package
Package: peco
Version: 0.99.0
Date: 2019-09-11
Title: A supervised approach for **P**r**e**dicting **c**ell cycle
pr**o**gression using scRNA-seq data
Authors@R: c(person("Chiaowen Joyce","Hsiao",email="joyce.hsiao1@gmail.com",
role=c("aut","cre"),
comment = c(ORCID = "0000-0001-8961-7522")),
person("Matthew","Stephens", email="stephens999@gmail.com",
role="aut", comment = c(ORCID = "0000-0001-5397-9257")),
person("John","Blischak", email = "jdblischak@gmail.com",
role="ctb", comment = c(ORCID = "0000-0003-2634-9879")),
person("Peter","Carbonetto", email = "peter.carbonetto@gmail.com",
role="ctb", comment = c(ORCID = "0000-0003-1144-6780")))
Title: A Supervised Approach for **P**r**e**dicting **c**ell Cycle
Pr**o**gression using scRNA-seq data
Authors@R: c(
person("Chiaowen Joyce","Hsiao", email="joyce.hsiao1@gmail.com",
role=c("aut","cre")),
person("Matthew", "Stephens", email="stephens999@gmail.com", role="aut"),
person("John", "Blischak", email = "jdblischak@gmail.com", role="ctb"),
person("Peter", "Carbonetto", email = "peter.carbonetto@gmail.com",
role="ctb"))
Maintainer: Chiaowen Joyce Hsiao <joyce.hsiao1@gmail.com>
Description: Our approach provides a way to assign continuous cell cycle phase
using scRNA-seq data, and consequently, allows to identify cyclic trend
Expand All @@ -30,17 +28,17 @@ Imports:
Biobase,
circular,
conicfit,
doParallel,
foreach,
genlasso,
ggplot2,
graphics,
MASS,
Matrix,
methods,
parallel,
stats
Suggests:
knitr,
rmarkdown
Enchances:
Enhances:
parallel
Remotes:
glmgen/genlasso
Expand Down
7 changes: 5 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,21 @@ export(fit_trendfilter_generic)
export(intensity2circle)
export(rotation)
export(shift_origin)
import(Biobase)
import(doParallel)
import(foreach)
import(methods)
import(parallel)
importFrom(assertthat,assert_that)
importFrom(circular,coord2rad)
importFrom(conicfit,AtoG)
importFrom(conicfit,EllipseDirectFit)
importFrom(conicfit,Residuals.ellipse)
importFrom(conicfit,calculateEllipse)
importFrom(genlasso,cv.trendfilter)
importFrom(genlasso,predict.genlasso)
importFrom(genlasso,trendfilter)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(parallel,mclapply)
importFrom(stats,approxfun)
importFrom(stats,dnorm)
importFrom(stats,loess)
Expand Down
5 changes: 4 additions & 1 deletion R/circ_dist.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#' @name circ_dist
#'
#' @title Pairwise distance between two circular variables
#'
#' @description We define distance between two angles: the minimum of
Expand All @@ -24,11 +26,12 @@
#'
#' @author Joyce Hsiao, Matthew Stephens
#' @export
#'
circ_dist <- function(y1,y2) {
pmin(abs(y2-y1), abs(2*pi-(abs(y2-y1))))
}

#' @name rotation
#'
#' @title Rotate circular variable shift_var to minimize distance
#' between ref_var and shift_var
#'
Expand Down
120 changes: 86 additions & 34 deletions R/cycle_npreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
# to another gene expression dataset to infer an angle or cell cycle
# phase for each cell.

#' @name cycle_npreg_insample
#'
#' @title Obtain cyclic trend estimates from the training data
#'
#' @description Estimates cyclic trends of gene expression levels
Expand All @@ -15,7 +17,7 @@
#'
#' @param theta A vector of angles.
#'
#' @param ncores We use mclapply function for parallel computing.
#' @param ncores We use doParallel package for parallel computing.
#'
#' @param polyorder We estimate cyclic trends of gene expression
#' levels using nonparamtric trend filtering. The default fits second
Expand All @@ -39,11 +41,13 @@
#' trends of gene express levels for each gene.}
#'
#' @examples
#' library(Biobase)
#' # import data
#' data(eset_sub)
#'
#' # select top 5 cyclic genes
#' eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],]
#' eset_top5 <- eset_sub[
#' order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],]
#'
#' # normalize molecule count for differencese in library sizes
#' counts_normed <- t((10^6)*(t(exprs(eset_top5))/pData(eset_top5)$molecules))
Expand All @@ -53,7 +57,8 @@
#' pdata <- pData(eset_top5)[order(pData(eset_top5)$theta_shifted),]
#'
#' # quantile-transform each gene to normal distribution
#' expr_quant <- do.call(rbind, lapply(seq_len(nrow(counts_normed)), function(g) {
#' expr_quant <- do.call(rbind,
#' lapply(seq_len(nrow(counts_normed)), function(g) {
#' yy <- counts_normed[g,]
#' is.zero <- which(yy == 0)
#' qq.map <- qqnorm(yy, plot.it = FALSE)
Expand All @@ -69,7 +74,8 @@
#' which_samples_train <- rownames(pdata)[which(pdata$chip_id != "NA18511")]
#' which_samples_predict <- rownames(pdata)[which(pdata$chip_id == "NA18511")]
#'
#' # make an example of using data from 5 individuals to predict phase in one indivdual
#' # make an example of using data from 5 individuals to predict
#' # phase in one indivdual
#' Y_train <- expr_quant[, which(colnames(expr_quant) %in% which_samples_train)]
#' theta_train <- pdata$theta_shifted[which(rownames(pdata) %in% which_samples_train)]
#' names(theta_train) <- rownames(pdata)[which(rownames(pdata) %in% which_samples_train)]
Expand All @@ -81,7 +87,8 @@
#' ncores=2,
#' method.trend="trendfilter")
#'
#' Y_predict <- expr_quant[, which(colnames(expr_quant) %in% which_samples_predict)]
#' Y_predict <- expr_quant[,
#' which(colnames(expr_quant) %in% which_samples_predict)]
#'
#' theta_test <- pdata$theta[which(rownames(pdata) %in% which_samples_predict)]
#' names(theta_test) <- rownames(pdata)[which(rownames(pdata) %in% which_samples_predict)]
Expand Down Expand Up @@ -113,10 +120,10 @@
#'
#' @author Joyce Hsiao
#'
#' @export
#' @import Biobase
#' @import methods
#'
#' @import methods Biobase MASS Matrix ggplot2
NULL
#' @export
cycle_npreg_insample <- function(Y, theta,
ncores=4,
polyorder=2,
Expand All @@ -142,6 +149,8 @@ cycle_npreg_insample <- function(Y, theta,
funs_est=initial_mstep$funs))
}

#' @name cycle_npreg_outsample
#'
#' @title Predict test-sample ordering using training labels (no update)
#'
#' @description Apply the estimates of cycle_npreg_insample to another
Expand All @@ -168,7 +177,7 @@ cycle_npreg_insample <- function(Y, theta,
#' @param get_trend_estimates To re-estimate the cylic trend based on
#' the predicted cell cycle phase or not (T or F). Default FALSE. This step
#' calls trendfilter and is computationally intensive.
#' @param ncores We use mclapply function for parallel computing.
#' @param ncores We use doParallel package for parallel computing.
#'
#' @inheritParams cycle_npreg_mstep
#' @inheritParams cycle_npreg_loglik
Expand Down Expand Up @@ -207,7 +216,8 @@ cycle_npreg_insample <- function(Y, theta,
#' data(eset_sub)
#'
#' # select top 5 cyclic genes
#' eset_top5 <- eset_sub[order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],]
#' eset_top5 <- eset_sub[
#' order(fData(eset_sub)$pve_fucci, decreasing=TRUE)[c(1:5)],]
#'
#' # normalize molecule count for differencese in library sizes
#' counts_normed <- t((10^6)*(t(exprs(eset_top5))/pData(eset_top5)$molecules))
Expand All @@ -217,7 +227,8 @@ cycle_npreg_insample <- function(Y, theta,
#' pdata <- pData(eset_top5)[order(pData(eset_top5)$theta_shifted),]
#'
#' # quantile-transform each gene to normal distribution
#' expr_quant <- do.call(rbind, lapply(seq_len(nrow(counts_normed)), function(g) {
#' expr_quant <- do.call(rbind,
#' lapply(seq_len(nrow(counts_normed)), function(g) {
#' yy <- counts_normed[g,]
#' is.zero <- which(yy == 0)
#' qq.map <- qqnorm(yy, plot.it = FALSE)
Expand Down Expand Up @@ -245,7 +256,8 @@ cycle_npreg_insample <- function(Y, theta,
#' ncores=2,
#' method.trend="trendfilter")
#'
#' Y_predict <- expr_quant[, which(colnames(expr_quant) %in% which_samples_predict)]
#' Y_predict <- expr_quant[,
#' which(colnames(expr_quant) %in% which_samples_predict)]
#'
#' theta_test <- pdata$theta[which(rownames(pdata) %in% which_samples_predict)]
#' names(theta_test) <- rownames(pdata)[which(rownames(pdata) %in% which_samples_predict)]
Expand Down Expand Up @@ -275,10 +287,9 @@ cycle_npreg_insample <- function(Y, theta,
#' }
#' title("Predicting cell cycle phase for NA18511", outer=TRUE)
#'
#' @import Biobase
#' @import methods
#' @export
#'
#' @import methods Biobase MASS Matrix ggplot2
NULL
cycle_npreg_outsample <- function(Y_test,
sigma_est,
funs_est,
Expand Down Expand Up @@ -328,6 +339,8 @@ cycle_npreg_outsample <- function(Y_test,

#------ Supporting functions

#' @name initialize_grids
#'
#' @title For prediction, initialize grid points for cell cycle phase
#' on a circle.
#'
Expand All @@ -346,10 +359,6 @@ cycle_npreg_outsample <- function(Y_test,
#'
#' @author Joyce Hsiao
#'
#' @importFrom stats prcomp
#' @importFrom circular coord2rad
#' @import methods Biobase MASS Matrix ggplot2
NULL
initialize_grids <- function(Y, grids=100,
method.grid=c("pca", "uniform")) {

Expand Down Expand Up @@ -382,6 +391,8 @@ initialize_grids <- function(Y, grids=100,
return(theta_initial)
}

#' @name cycle_npreg_loglik
#'
#' @title Infer angles or cell cycle phase based on gene expression data
#'
#' @param Y Gene by sample expression matrix.
Expand All @@ -405,9 +416,6 @@ initialize_grids <- function(Y, grids=100,
#' to each bin.}
#'
#' @author Joyce Hsiao
#' @importFrom stats dnorm
#' @import methods Biobase MASS Matrix ggplot2
NULL
cycle_npreg_loglik <- function(Y, sigma_est, funs_est,
grids=100,
method.grid=c("pca", "uniform")) {
Expand Down Expand Up @@ -467,6 +475,8 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est,
prob_per_cell_by_celltimes=prob_per_cell_by_celltimes))
}

#' @name cycle_npreg_mstep
#'
#' @title Estimate parameters of the cyclic trends
#'
#' @description This is used in both cycle_npreg_insample (training
Expand All @@ -481,10 +491,11 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est,
#' @param method.trend How to estimate cyclic trend of gene expression values?
#' We offer three options: 'trendfilter' (\code{fit_trendfilter_generic()}),
#' 'loess' (\code{fit_loess()}) and 'bsplines' (\code{fit_bspline()}).
#' 'trendfilter' provided the best fit in our study. But 'trendfilter` requires
#' cross-validation and take some time. Therefore, we recommend using bspline for
#' quick results.
#' @param ncores How many computing cores to use? We use mclapply function for parallel computing.
#' 'trendfilter' provided the best fit in our study. But 'trendfilter`
#' uses cross-validation and takes some time. Therefore, we recommend
#' using bspline for quick results.
#' @param ncores How many computing cores to use? We use doParallel package for
#' parallel computing.
#'
#' @inheritParams fit_trendfilter_generic
#' @inheritParams fit_bspline
Expand All @@ -500,19 +511,28 @@ cycle_npreg_loglik <- function(Y, sigma_est, funs_est,
#' each gene}
#' \item{funs}{Estimated cyclic functions}
#'
#' @importFrom assertthat assert_that
#' @importFrom stats dnorm approxfun
#' @import doParallel
#' @import parallel
#' @import foreach
#'
#' @author Joyce Hsiao
#'
#' @importFrom assertthat assert_that
#' @importFrom parallel mclapply
#' @importFrom stats approxfun
#' @import methods Biobase MASS Matrix ggplot2
NULL
cycle_npreg_mstep <- function(Y, theta, method.trend=c("trendfilter",
"loess", "bspline"),
polyorder=2,
ncores=4) {

if (is.null(ncores)) {
cl <- parallel::makeCluster(2)
doParallel::registerDoParallel(cl)
message(paste("computing on",ncores,"cores"))
} else {
cl <- parallel::makeCluster(ncores)
doParallel::registerDoParallel(cl)
message(paste("computing on",ncores,"cores"))
}

G <- nrow(Y)
N <- ncol(Y)

Expand All @@ -529,7 +549,38 @@ cycle_npreg_mstep <- function(Y, theta, method.trend=c("trendfilter",

# for each gene, estimate the cyclical pattern of gene expression
# conditioned on the given cell times
fit <- mclapply(seq_len(G), function(g) {
# fit <- mclapply(seq_len(G), function(g) {
# y_g <- Y_ordered[g,]
#
# if (method.trend=="trendfilter") {
# fit_g <- fit_trendfilter_generic(yy=y_g, polyorder = polyorder)
# fun_g <- approxfun(x=as.numeric(theta_ordered),
# y=as.numeric(fit_g$trend.yy), rule=2)
# mu_g <- fit_g$trend.yy
# }
# if (method.trend=="bspline") {
# fit_g <- fit_bspline(yy=y_g, time = theta_ordered)
# fun_g <- approxfun(x=as.numeric(theta_ordered),
# y=as.numeric(fit_g$pred.yy), rule=2)
# mu_g <- fit_g$pred.yy
# }
#
# if (method.trend=="loess") {
# fit_g <- fit_loess(yy=y_g, time = theta_ordered)
# fun_g <- approxfun(x=as.numeric(theta_ordered),
# y=as.numeric(fit_g$pred.yy), rule=2)
# mu_g <- fit_g$pred.yy
# }
#
# sigma_g <- sqrt(sum((y_g-mu_g)^2)/N)
#
# list(y_g =y_g,
# mu_g=mu_g,
# sigma_g=sigma_g,
# fun_g=fun_g)
# }, mc.cores = ncores)

fit <- foreach::foreach(g=seq_len(G)) %dopar% {
y_g <- Y_ordered[g,]

if (method.trend=="trendfilter") {
Expand Down Expand Up @@ -558,7 +609,8 @@ cycle_npreg_mstep <- function(Y, theta, method.trend=c("trendfilter",
mu_g=mu_g,
sigma_g=sigma_g,
fun_g=fun_g)
}, mc.cores = ncores)
}
parallel::stopCluster(cl)

sigma_est <- sapply(fit, "[[", "sigma_g")
names(sigma_est) <- rownames(Y_ordered)
Expand Down
Loading

0 comments on commit cf247e6

Please sign in to comment.