Skip to content

Commit

Permalink
Option in scDissim method to not use the step function.
Browse files Browse the repository at this point in the history
  • Loading branch information
wvdbyl1 committed Dec 6, 2016
1 parent d884b0c commit 63fb614
Show file tree
Hide file tree
Showing 10 changed files with 269 additions and 8 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ LinkingTo: Rcpp, RcppParallel, RcppEigen
RoxygenNote: 5.0.1
NeedsCompilation: yes
SystemRequirements: GNU make
Suggests: testthat
29 changes: 24 additions & 5 deletions R/CIDR.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ setClass("scData", representation(tags="matrix",
priorTPM="numeric",
dThreshold="vector",
wThreshold="numeric",
pDropoutCoefA="numeric",
pDropoutCoefB="numeric",
dropoutCandidates="matrix",
PC="matrix",
variation="vector",
Expand Down Expand Up @@ -188,21 +190,29 @@ setGeneric("wThreshold", function(object, cutoff=0.5, plotTornado=FALSE){
#' @title Imputation Weighting Threshold
#'
#' @description
#' determines the imputation weighting threshold.
#' Determines the imputation weighting threshold.
#'
#' @rdname wThreshold
#' @name wThreshold
#'
#' @param object an scData class object.
#' @param cutoff parameter in the range (0,1), used in the calculation of imputation weighting threshold.
#' @param cutoff parameter in the range (0,1), used in the calculation of imputation weighting threshold. Default is 0.5.
#' @param plotTornado Boolean; if \code{TRUE}, the \emph{Tornado Plot} is produced.
#'
#' @details
#' This method finds a function P(u) that maps the average expression level of a gene to the
#' probability of a dropout occurring. It does this by fitting a negative logistic function
#' to the empirical dropouts vs average expression data. The imputation weighting threshold
#' is calculated as the value of u at which P(u) = \code{cutoff}.
#'
#' @importFrom minpack.lm nlsLM
#' @export
#'
#' @return an updated scData class object with the following attribute updated
#'
#' \item{wThreshold}{imputation weighting threshold.}
#' \item{pDropoutCoefA}{the steepness parameter of the negative logistic function that fits the data.}
#' \item{pDropoutCoefB}{the midpoint parameter of the negative logistic function that fits the data.}
#'
#' @examples
#' example(cidr)
Expand Down Expand Up @@ -238,10 +248,12 @@ setMethod("wThreshold", "scData", function(object, cutoff, plotTornado){
}

object@wThreshold <- threshold
object@pDropoutCoefA <- a
object@pDropoutCoefB <- b
return(object)
})

setGeneric("scDissim", function(object, correction=FALSE, threads=0) {
setGeneric("scDissim", function(object, correction=FALSE, threads=0, useStepFunction=TRUE) {
standardGeneric("scDissim")
})

Expand All @@ -256,6 +268,9 @@ setGeneric("scDissim", function(object, correction=FALSE, threads=0) {
#' @param object an scData class object.
#' @param correction Boolean; if \code{TRUE}, Cailliez correction is applied; by default \code{FALSE}.
#' @param threads integer; number of threads to be used; by default \code{0}, which uses all available threads.
#' @param useStepFunction Boolean; if \code{TRUE}, uses the step function as the imputation weighting function.
#' If \code{FALSE}, uses the negative logistic function that was fit by the \code{\link{wThreshold}} function.
#' Default is \code{TRUE}.
#'
#' @importFrom Rcpp evalCpp
#' @importFrom ade4 cailliez
Expand All @@ -268,7 +283,7 @@ setGeneric("scDissim", function(object, correction=FALSE, threads=0) {
#'
#' @examples
#' example(cidr)
setMethod("scDissim", "scData", function(object, correction, threads){
setMethod("scDissim", "scData", function(object, correction, threads, useStepFunction){
## the user can choose the number of threads
threads_int <- as.integer(threads)
if(!is.na(threads_int) && (threads_int > 0) && (threads_int < defaultNumThreads())) {
Expand All @@ -282,7 +297,11 @@ setMethod("scDissim", "scData", function(object, correction, threads){
}
N <- ncol(object@nData)
Dist <- array(0, dim=c(N, N))
D <- cpp_dist(Dist, object@dropoutCandidates, object@nData, N, object@wThreshold)
if (useStepFunction) {
D <- cpp_dist(Dist, object@dropoutCandidates, object@nData, N, object@wThreshold)
} else {
D <- cpp_dist_weighted(Dist, object@dropoutCandidates, object@nData, N, object@pDropoutCoefA, object@pDropoutCoefB)
}
D <- sqrt(D)
D <- D+t(D)
if(correction){
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ cpp_dist <- function(dist, truth, counts, ncol, threshold) {
.Call('cidr_cpp_dist', PACKAGE = 'cidr', dist, truth, counts, ncol, threshold)
}

cpp_dist_weighted <- function(dist, truth, counts, ncol, a, b) {
.Call('cidr_cpp_dist_weighted', PACKAGE = 'cidr', dist, truth, counts, ncol, a, b)
}

7 changes: 6 additions & 1 deletion man/scDissim.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions man/wThreshold.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,19 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// cpp_dist_weighted
NumericMatrix cpp_dist_weighted(NumericMatrix dist, IntegerMatrix truth, NumericMatrix counts, int ncol, double a, double b);
RcppExport SEXP cidr_cpp_dist_weighted(SEXP distSEXP, SEXP truthSEXP, SEXP countsSEXP, SEXP ncolSEXP, SEXP aSEXP, SEXP bSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< NumericMatrix >::type dist(distSEXP);
Rcpp::traits::input_parameter< IntegerMatrix >::type truth(truthSEXP);
Rcpp::traits::input_parameter< NumericMatrix >::type counts(countsSEXP);
Rcpp::traits::input_parameter< int >::type ncol(ncolSEXP);
Rcpp::traits::input_parameter< double >::type a(aSEXP);
Rcpp::traits::input_parameter< double >::type b(bSEXP);
rcpp_result_gen = Rcpp::wrap(cpp_dist_weighted(dist, truth, counts, ncol, a, b));
return rcpp_result_gen;
END_RCPP
}
96 changes: 96 additions & 0 deletions src/scPCA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,99 @@ NumericMatrix cpp_dist(NumericMatrix dist, IntegerMatrix truth, NumericMatrix co

return dist;
}



struct ProbDropoutFunc {
double a;
double b;

inline double getProb(double x) {
return 1 / ( 1 + exp(a*(x - b)) );
}

inline double distance(double count1, int isDropoutCandidate1, double count2, int isDropoutCandidate2) {
if (isDropoutCandidate1 && isDropoutCandidate2) {
return 0;
} else {
if (isDropoutCandidate1 /*&& !isDropoutCandidate2*/) {
double p = getProb(count2);
count1 = p*count2 + (1 - p)*count1;
}
if (isDropoutCandidate2 /*&& !isDropoutCandidate1*/) {
double p = getProb(count1);
count2 = p*count1 + (1 - p)*count2;
}
double diff = count1 - count2;
return diff*diff;
}
}
};

// helper inline code to calculate individual dissimilarity entries - given
// particular count and truth "column" vectors
template <typename InputIterator1, typename InputIterator2, typename InputIterator3, typename InputIterator4>
inline double
weighted_accumulate(InputIterator1 count_col1_it,
InputIterator1 count_col1_end,
InputIterator2 count_col2_it,
InputIterator3 truth_col1_it,
InputIterator4 truth_col2_it,
ProbDropoutFunc func) {
double part_dist=0;
for (; count_col1_it != count_col1_end ; ++count_col1_it) {
part_dist += func.distance(*count_col1_it, *truth_col1_it, *count_col2_it, *truth_col2_it);
++count_col2_it;
++truth_col1_it;
++truth_col2_it;
}
return part_dist;
}

// worker object for use in parallel code - calculates dissimilarity matrix entries
// for a given begin/end range
struct WeightedCidrDistance : public Worker {

// Distance matrix - input & output
RMatrix<double> dist;

// truth matrix - input
const RMatrix<int> truth;

// Counts matrix - input
const RMatrix<double> counts;

const ProbDropoutFunc coefs;

// cidrDistance constructor - convert input matrices to RMatrix type
WeightedCidrDistance(NumericMatrix dist, const IntegerMatrix truth, const NumericMatrix counts, const ProbDropoutFunc coefs)
: dist(dist), truth(truth), counts(counts), coefs(coefs) {}

// TODO - only expecting begin & end - as parallelFor only works with begin/end
// maybe don't need all the others - just counts_begin & counts_end
void operator()(std::size_t begin, std::size_t end) {
for (std::size_t i = begin; i<end; ++i) {
// hold first columns in memory
RMatrix<double>::Column count_col_1 = counts.column(i);
RMatrix<int>::Column truth_col_1 = truth.column(i);
for (std::size_t j=0; j<i; ++j) {
RMatrix<double>::Column count_col_2 = counts.column(j);
RMatrix<int>::Column truth_col_2 = truth.column(j);
// this needs changing to new namenclature
dist(j,i) = weighted_accumulate(count_col_1.begin(), count_col_1.end(), count_col_2.begin(), truth_col_1.begin(), truth_col_2.begin(), coefs);
}
}
}
};

// [[Rcpp::export]]
NumericMatrix cpp_dist_weighted(NumericMatrix dist, IntegerMatrix truth, NumericMatrix counts, int ncol, double a, double b) {
// create worker object for parallel code
ProbDropoutFunc coefs = { a, b };
WeightedCidrDistance cidrDistance(dist, truth, counts, coefs);

// do the work in parallel
parallelFor(0, ncol, cidrDistance);

return dist;
}
4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(cidr)

test_check("cidr")
55 changes: 55 additions & 0 deletions tests/testthat/test-cpp_dist.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
context("cpp_dist - C++ function to create dissimilarity matrix")

test_that("distance calculation works", {
N <- 2

# Both dropout candidates
dist <- array(0, dim=c(N, N))
dropouts <- array(1, dim=c(1, N))
data <- array(c(1.2, 0.9), dim=c(1,N))
threshold <- 5.0
cpp_dist(dist, dropouts, data, N, threshold)
expect_equal(dist, array(c(c(0, 0), c(0, 0)), dim=c(2,2)))

# Neither dropouts, same counts
dist <- array(0, dim=c(N, N))
dropouts <- array(0, dim=c(1, N))
data <- array(c(3.5, 3.5), dim=c(1,N))
threshold <- 5.0
cpp_dist(dist, dropouts, data, N, threshold)
expect_equal(dist, array(c(c(0, 0), c(0, 0)), dim=c(2,2)))

# Neither dropouts, different counts
dist <- array(0, dim=c(N, N))
dropouts <- array(0, dim=c(1, N))
data <- array(c(3.5, 3.6), dim=c(1,N))
threshold <- 5.0
cpp_dist(dist, dropouts, data, N, threshold)
expect_equal(dist, array(c(c(0, 0), c(0.01, 0)), dim=c(2,2)))

# #1 is dropout, #2 below threshold (imputation occurs)
dist <- array(0, dim=c(N, N))
dropouts <- array(c(1, 0), dim=c(1, N))
data <- array(c(1.2, 4.5), dim=c(1,N))
threshold <- 5.0
cpp_dist(dist, dropouts, data, N, threshold)
expect_equal(dist, array(c(c(0, 0), c(0, 0)), dim=c(2,2)))

# #1 is dropout, #2 above threshold
dist <- array(0, dim=c(N, N))
dropouts <- array(c(1, 0), dim=c(1, N))
data <- array(c(1.2, 5.2), dim=c(1,N))
threshold <- 5.0
cpp_dist(dist, dropouts, data, N, threshold)
expect_equal(dist, array(c(c(0, 0), c(16, 0)), dim=c(2,2)))

# 3 features
dist <- array(0, dim=c(N, N))
dropouts <- array(c(c(0, 0, 0),
c(1, 1, 0)), dim=c(3, N))
data <- array(c(c(4.2, 5.7, 4.5),
c(1.2, 0.7, 2.5)), dim=c(3,N))
threshold <- 5.0
cpp_dist(dist, dropouts, data, N, threshold)
expect_equal(dist, array(c(c(0, 0), c(29, 0)), dim=c(2,2)))
})
53 changes: 53 additions & 0 deletions tests/testthat/test-cpp_dist_weighted.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
context("cpp_dist_weighted - dissimilarity matrix using weighted imputation")

test_that("distance calculation works", {
N <- 2
a <- 1.5
b <- 5.0

# Both dropout candidates
dist <- array(0, dim=c(N, N))
dropouts <- array(1, dim=c(1, N))
data <- array(c(1.2, 0.9), dim=c(1,N))
cpp_dist_weighted(dist, dropouts, data, N, a, b)
expect_equal(dist, array(c(c(0, 0), c(0, 0)), dim=c(2,2)))

# Neither dropouts, same counts
dist <- array(0, dim=c(N, N))
dropouts <- array(0, dim=c(1, N))
data <- array(c(3.5, 3.5), dim=c(1,N))
cpp_dist_weighted(dist, dropouts, data, N, a, b)
expect_equal(dist, array(c(c(0, 0), c(0, 0)), dim=c(2,2)))

# Neither dropouts, different counts
dist <- array(0, dim=c(N, N))
dropouts <- array(0, dim=c(1, N))
data <- array(c(3.5, 3.6), dim=c(1,N))
cpp_dist_weighted(dist, dropouts, data, N, a, b)
expect_equal(dist, array(c(c(0, 0), c(0.01, 0)), dim=c(2,2)))

# #1 dropout, #2 not dropout (imputation occurs)
dist <- array(0, dim=c(N, N))
dropouts <- array(c(1, 0), dim=c(1, N))
data <- array(c(0.5, 5.0), dim=c(1,N))
# P(5.0) = 0.5, imputed value = 2.75
cpp_dist_weighted(dist, dropouts, data, N, a, b)
expect_equal(dist, array(c(c(0, 0), c(5.0625, 0)), dim=c(2,2)))

# #1 not dropout, #2 dropout (imputation occurs)
dist <- array(0, dim=c(N, N))
dropouts <- array(c(0, 1), dim=c(1, N))
data <- array(c(3.5, 0.5), dim=c(1,N))
# P(3.5) = 0.9046505351, imputed value = 3.2139516053
cpp_dist_weighted(dist, dropouts, data, N, a, b)
expect_equal(dist, array(c(c(0, 0), c(0.081823684110447, 0)), dim=c(2,2)))

# 3 features
dist <- array(0, dim=c(N, N))
dropouts <- array(c(c(0, 0, 0),
c(1, 1, 0)), dim=c(3, N))
data <- array(c(c(5.0, 5.0, 4.5),
c(0.5, 1.0, 2.5)), dim=c(3,N))
cpp_dist_weighted(dist, dropouts, data, N, a, b)
expect_equal(dist, array(c(c(0, 0), c(13.0625, 0)), dim=c(2,2)))
})

0 comments on commit 63fb614

Please sign in to comment.