diff --git a/CMakeLists.txt b/CMakeLists.txt
index 159969b9..d363dd51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,7 +13,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "")
 set(CMAKE_VERBOSE_MAKEFILE TRUE)
 
 set(srclist ModularityOptimizer.cpp RModularityOptimizer.cpp RcppExports.cpp
-data_processing.cpp fast_wilcox.cpp feature_mat.cpp quantile_norm.cpp run_nmf.cpp snn.cpp)
+data_processing.cpp fast_wilcox.cpp feature_mat.cpp quantile_norm.cpp snn.cpp)
 list(TRANSFORM srclist PREPEND "${PROJECT_SOURCE_DIR}/src/")
 add_library(rliger2 SHARED ${srclist})
 set_target_properties(rliger2 PROPERTIES PREFIX "")
diff --git a/DESCRIPTION b/DESCRIPTION
index 5c25e57e..04fd260c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -27,7 +27,7 @@ Author: Joshua Welch [aut],
         Robert Lee [ctb],
         Yichen Wang [aut, cre],
         Andrew Robbins [ctb]
-Maintainer: Chao Gao <gchao@umich.edu>
+Maintainer: Yichen Wang <wayichen@umich.edu>
 BugReports: https://github.com/welch-lab/liger/issues
 URL: https://github.com/welch-lab/liger
 License: GPL-3 | file LICENSE
diff --git a/NAMESPACE b/NAMESPACE
index 539de6f3..5f648a7e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -11,9 +11,12 @@ S3method(as.ligerDataset,matrixLike)
 S3method(c,liger)
 S3method(cbind,ligerDataset)
 S3method(fortify,liger)
-S3method(runBPPINMF,Seurat)
-S3method(runBPPINMF,liger)
-S3method(runBPPINMF,list)
+S3method(runINMF,Seurat)
+S3method(runINMF,liger)
+S3method(runINMF,list)
+S3method(runOnlineINMF,Seurat)
+S3method(runOnlineINMF,liger)
+S3method(runOnlineINMF,list)
 S3method(runUINMF,Seurat)
 S3method(runUINMF,liger)
 S3method(runUINMF,list)
@@ -39,6 +42,7 @@ export(calcDatasetSpecificity)
 export(calcNormLoadings)
 export(calcPurity)
 export(cellMeta)
+export(closeAllH5)
 export(commandDiff)
 export(commands)
 export(convertOldLiger)
@@ -73,7 +77,6 @@ export(normPeak)
 export(normalize)
 export(normalizePeak)
 export(online_iNMF)
-export(optimizeALS)
 export(optimizeNewData)
 export(optimizeNewK)
 export(optimizeNewLambda)
@@ -117,14 +120,15 @@ export(removeMissing)
 export(restoreH5Liger)
 export(retrieveCellFeature)
 export(reverseMethData)
-export(runBPPINMF)
 export(runCluster)
 export(runDoubletFinder)
 export(runGOEnrich)
 export(runGSEA)
 export(runGeneralQC)
 export(runINMF)
+export(runINMF_R)
 export(runMarkerDEG)
+export(runOnlineINMF)
 export(runPairwiseDEG)
 export(runPseudoBulkDEG)
 export(runTSNE)
@@ -180,12 +184,11 @@ exportMethods(length)
 exportMethods(names)
 exportMethods(normData)
 exportMethods(normPeak)
-exportMethods(optimizeALS)
 exportMethods(quantileNorm)
 exportMethods(quantile_norm)
 exportMethods(rawData)
 exportMethods(rawPeak)
-exportMethods(runINMF)
+exportMethods(runINMF_R)
 exportMethods(scaleData)
 exportMethods(scaleUnsharedData)
 exportMethods(show)
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 22504899..fe87fba9 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -115,10 +115,6 @@ max_factor <- function(H, dims_use, center_cols = FALSE) {
     .Call(`_rliger2_max_factor`, H, dims_use, center_cols)
 }
 
-solveNNLS <- function(C, B) {
-    .Call(`_rliger2_solveNNLS`, C, B)
-}
-
 ComputeSNN <- function(nn_ranked, prune) {
     .Call(`_rliger2_ComputeSNN`, nn_ranked, prune)
 }
diff --git a/R/h5Utility.R b/R/h5Utility.R
index 9cb768e1..94eda029 100644
--- a/R/h5Utility.R
+++ b/R/h5Utility.R
@@ -195,11 +195,14 @@ restoreH5Liger <- function(object, filePath = NULL) {
         }
         h5.meta <- h5fileInfo(object)
         if (is.null(filePath)) filePath <- h5.meta$filename
+        if (is.null(filePath)) {
+            stop("No filename identified")
+        }
         if (!file.exists(filePath)) {
             stop("HDF5 file path does not exist:\n",
                  filePath)
         }
-        .log("Restoring ligerDataset from: ", filePath)
+        .log("filename identified: ", filePath)
         h5file <- hdf5r::H5File$new(filePath, mode = "r+")
         h5.meta$filename <- h5file$filename
         pathChecks <- unlist(lapply(h5.meta[4:10], function(x) {
@@ -228,16 +231,15 @@ restoreH5Liger <- function(object, filePath = NULL) {
         rawData(object, check = FALSE) <- h5file[[h5.meta$rawData]]
         if (!is.null(h5.meta$normData))
             normData(object, check = FALSE) <- h5file[[h5.meta$normData]]
-        if (!is.null(h5.meta$scaleData))
+        if (!is.null(h5.meta$scaleData)) {
             scaleData(object, check = FALSE) <- h5file[[h5.meta$scaleData]]
+        }
         methods::validObject(object)
     } else {
         # Working for liger object
         if (!is.null(filePath)) {
             if (!is.list(filePath) || is.null(names(filePath)))
                 stop("`filePath` has to be a named list for liger object.")
-            if (!any(names(filePath) %in% names(object)))
-                stop("names of `filePath` must be found in `names(object)`.")
         }
         for (d in names(object)) {
             if (isH5Liger(object, d)) {
@@ -249,7 +251,8 @@ restoreH5Liger <- function(object, filePath = NULL) {
                                 filePath[[d]])
                     else path <- filePath[[d]]
                 }
-                dataset(object, d, qc = FALSE) <-
+                .log("Restoring dataset \"", d, "\"")
+                datasets(object, check = FALSE)[[d]] <-
                     restoreH5Liger(dataset(object, d), filePath[[d]])
             }
         }
@@ -270,3 +273,39 @@ restoreH5Liger <- function(object, filePath = NULL) {
         list(folder = NULL, data = path)
     }
 }
+
+#' Close all links (to HDF5 files) of a liger object
+#' @description When need to interact with the data embedded in HDF5 files out
+#' of the currect R session, the HDF5 files has to be closed in order to be
+#' available to other processes.
+#' @param object liger object.
+#' @return \code{object} with links closed.
+#' @export
+closeAllH5 <- function(object) {
+    if (!isH5Liger(object)) return(object)
+    for (dn in names(object)) {
+        if (!isH5Liger(object, dn)) next
+        ld <- dataset(object, dn)
+        # if (!is.null(rawData(ld))) rawData(ld)$close()
+        # if (!is.null(normData(ld))) normData(ld)$close()
+        # if (!is.null(scaleData(ld))) scaleData(ld)$close()
+        # if (!is.null(scaleUnsharedData(ld))) scaleUnsharedData(ld)$close()
+        h5f <- getH5File(object, dn)
+        h5f$close_all()
+    }
+    return(object)
+}
+
+.H5GroupToH5SpMat <- function(obj, dims) {
+    groupPath <- obj$get_obj_name()
+    RcppPlanc::H5SpMat(filename = obj$get_filename(),
+                       valuePath = paste0(groupPath, "/data"),
+                       rowindPath = paste0(groupPath, "/indices"),
+                       colptrPath = paste0(groupPath, "/indptr"),
+                       nrow = dims[1], ncol = dims[2])
+}
+
+.H5DToH5Mat <- function(obj) {
+    RcppPlanc::H5Mat(filename = obj$get_filename(),
+                     dataPath = obj$get_obj_name())
+}
diff --git a/R/import.R b/R/import.R
index 4b138db8..05cd5974 100644
--- a/R/import.R
+++ b/R/import.R
@@ -324,21 +324,24 @@ readLiger <- function(
         clusterName = "clusters",
         h5FilePath = NULL,
         update = TRUE) {
-    oldObj <- readRDS(filename)
-    if (!inherits(oldObj, "liger"))
+    obj <- readRDS(filename)
+    if (!inherits(obj, "liger"))
         stop("Object is not of class \"liger\".")
-    oldVer <- oldObj@version
-    if (oldVer >= package_version("1.99.0")) return(oldObj)
-    .log("Older version (", oldVer, ") of liger object detected.")
+    ver <- obj@version
+    if (ver >= package_version("1.99.0")) {
+        if (isH5Liger(obj)) obj <- restoreH5Liger(obj)
+        return(obj)
+    }
+    .log("Older version (", ver, ") of liger object detected.")
     if (isTRUE(update)) {
         .log("Updating the object structure to make it compatible ",
              "with current version (", utils::packageVersion("rliger2"), ")")
-        return(convertOldLiger(oldObj, dimredName = dimredName,
+        return(convertOldLiger(obj, dimredName = dimredName,
                                clusterName = clusterName,
                                h5FilePath = h5FilePath))
     } else {
         .log("`update = FALSE` specified. Returning the original object.")
-        return(oldObj)
+        return(obj)
     }
 }
 
diff --git a/R/liger-class.R b/R/liger-class.R
index 355ff205..74247ab6 100644
--- a/R/liger-class.R
+++ b/R/liger-class.R
@@ -93,7 +93,6 @@ liger <- setClass(
         for (d in names(x)) {
             ld <- dataset(x, d)
             if (!is.null(ld@V)) {
-                print(all.equal(rownames(ld@V), varFeatures(x)))
                 if (!identical(rownames(ld@V), varFeatures(x)))
                     return(paste("Variable features do not match dimension",
                                  "of V matrix in dataset", d))
@@ -105,9 +104,18 @@ liger <- setClass(
                         return(paste("Variable features do not match dimension",
                                      "of scaleData in dataset", d))
                 } else {
-                    if (scaleData(ld)$dims[1] != length(varFeatures(x)))
-                        return(paste("Variable features do not match dimension",
-                                     "of scaleData in dataset (H5)", d))
+                    if (inherits(scaleData(ld), "H5D")) {
+                        if (scaleData(ld)$dims[1] != length(varFeatures(x)))
+                            return(paste("Variable features do not match ",
+                                         "dimension of scaleData in dataset ",
+                                         "(H5)", d))
+                    } else if (inherits(scaleData(ld), "H5Group")) {
+                        if (scaleData(ld)[["featureIdx"]]$dims != length(varFeatures(x))) {
+                            return(paste("Variable features do not match ",
+                                         "dimension of scaleData in dataset ",
+                                         "(H5)", d))
+                        }
+                    }
                 }
             }
         }
@@ -116,7 +124,7 @@ liger <- setClass(
 }
 
 .valid.liger <- function(object) {
-    message("Checking liger object validity")
+    # message("Checking liger object validity")
     res <- .checkLigerBarcodes(object)
     if (!is.null(res)) return(res)
     res <- .checkLigerVarFeature(object)
@@ -372,7 +380,7 @@ setGeneric("dataset", function(x, dataset = NULL) standardGeneric("dataset"))
 
 #' @export
 #' @rdname liger-class
-setGeneric("dataset<-", function(x, dataset, type = NULL, qc = TRUE, value) {
+setGeneric("dataset<-", function(x, dataset, type = NULL, qc = FALSE, value) {
     standardGeneric("dataset<-")
 })
 
@@ -404,6 +412,56 @@ setMethod("dataset", signature(x = "liger", dataset = "numeric"),
               datasets(x)[[dataset]]
           })
 
+.mergeCellMeta <- function(cm1, cm2) {
+    newDF <- S4Vectors::DataFrame(row.names = c(rownames(cm1), rownames(cm2)))
+    for (var in names(cm1)) {
+        value <- cm1[[var]]
+        if (var %in% names(cm2)) {
+            # TODO: check type
+            tryCatch(
+                expr = {
+                    if (is.null(dim(value))) {
+                        value <- c(value, cm2[[var]])
+                    } else {
+                        value <- rbind(value, cm2[[var]])
+                    }
+                },
+                finally = {
+                    cm2Idx <- seq(nrow(cm1) + 1, nrow(cm1) + nrow(cm2))
+                    if (is.null(dim(value))) value[cm2Idx] <- NA
+                    else {
+                        empty <- matrix(NA, nrow = nrow(cm2), ncol = ncol(value))
+                        value <- rbind(value, empty)
+                    }
+                }
+            )
+        } else {
+            cm2Idx <- seq(nrow(cm1) + 1, nrow(cm1) + nrow(cm2))
+            if (is.null(dim(value))) value[cm2Idx] <- NA
+            else {
+                empty <- matrix(NA, nrow = nrow(cm2), ncol = ncol(value))
+                value <- rbind(value, empty)
+            }
+        }
+        newDF[[var]] <- value
+    }
+    for (var in names(cm2)[!names(cm2) %in% names(cm1)]) {
+        value <- cm2[[var]]
+        if (is.null(dim(value))) {
+            if (is.factor(value)) {
+                value <- factor(c(rep(NA, nrow(cm1)), value),
+                                levels = levels(value))
+            } else {
+                value <- c(rep(NA, nrow(cm1)), value)
+            }
+        } else {
+            empty <- matrix(NA, nrow = nrow(cm1), ncol = ncol(value))
+            value <- rbind(empty, value)
+        }
+        newDF[[var]] <- value
+    }
+    return(newDF)
+}
 .expandDataFrame <- function(df, idx) {
     dfList <- as.list(df)
     dfList <- lapply(dfList, function(x, idx) {
@@ -428,19 +486,20 @@ setMethod("dataset", signature(x = "liger", dataset = "numeric"),
 setReplaceMethod("dataset", signature(x = "liger", dataset = "character",
                                       type = "missing", qc = "ANY",
                                       value = "ligerDataset"),
-                 function(x, dataset, type = NULL, qc = TRUE, value) {
+                 function(x, dataset, type = NULL, qc = FALSE, value) {
                      if (dataset %in% names(x)) {
                          dataset(x, dataset) <- NULL
                      }
                      new.idx <- seq(ncol(x) + 1, ncol(x) + ncol(value))
                      x@datasets[[dataset]] <- value
-                     # TODO also add rows to cellMeta and H.norm
                      cm <- x@cellMeta
                      remainingRowname <- rownames(cm)
                      cm <- .expandDataFrame(cm, new.idx)
                      rownames(cm) <- c(remainingRowname, colnames(value))
+
                      levels(cm$dataset) <- c(levels(cm$dataset), dataset)
                      cm$dataset[new.idx] <- dataset
+                     cm$barcode[new.idx] <- colnames(value)
                      x@cellMeta <- cm
                      # x@W is genes x k, no need to worry
                      if (!is.null(x@H.norm)) {
@@ -462,11 +521,12 @@ setReplaceMethod("dataset", signature(x = "liger", dataset = "character",
                                       value = "matrixLike"),
                  function(x, dataset,
                           type = c("rawData", "normData", "scaleData"),
-                          qc = TRUE,
+                          qc = FALSE,
                           value) {
                      type <- match.arg(type)
                      if (type == "rawData") {
                          ld <- createLigerDataset(rawData = value)
+                         colnames(ld) <- paste0(dataset, "_", colnames(ld))
                      } else if (type == "normData") {
                          ld <- createLigerDataset(normData = value)
                      } else if (type == "scaleData") {
diff --git a/R/ligerDataset-class.R b/R/ligerDataset-class.R
index 06e55430..c427326a 100644
--- a/R/ligerDataset-class.R
+++ b/R/ligerDataset-class.R
@@ -5,9 +5,10 @@ setClassUnion("matrixLike_OR_NULL", c("matrixLike", "NULL"))
 # It is quite hard to handle "H5D here, which is indeed defined as an R6 class.
 # I'm not sure if this is a proper solution
 setOldClass("H5D")
+setOldClass("H5Group")
 suppressWarnings(setClassUnion("dgCMatrix_OR_H5D_OR_NULL", c("dgCMatrix", "H5D", "NULL")))
 setClassUnion("matrix_OR_H5D_OR_NULL", c("matrix", "H5D", "NULL"))
-setClassUnion("matrixLike_OR_H5D_OR_NULL", c("matrixLike", "H5D", "NULL"))
+setClassUnion("matrixLike_OR_H5D_OR_H5Group_OR_NULL", c("matrixLike", "H5D", "H5Group", "NULL"))
 setClassUnion("index",
               members = c("logical", "numeric", "character"))
 
@@ -39,8 +40,8 @@ ligerDataset <- setClass(
     representation(
         rawData = "dgCMatrix_OR_H5D_OR_NULL",
         normData = "dgCMatrix_OR_H5D_OR_NULL",
-        scaleData = "matrixLike_OR_H5D_OR_NULL",
-        scaleUnsharedData = "matrixLike_OR_H5D_OR_NULL",
+        scaleData = "matrixLike_OR_H5D_OR_H5Group_OR_NULL",
+        scaleUnsharedData = "matrixLike_OR_H5D_OR_H5Group_OR_NULL",
         varUnsharedFeatures = "character",
         H = "matrix_OR_NULL",
         V = "matrix_OR_NULL",
@@ -182,10 +183,10 @@ modalOf <- function(object) {
 
 .valid.ligerDataset <- function(object) {
     if (isH5Liger(object)) {
-        message("Checking h5 ligerDataset validity")
+        # message("Checking h5 ligerDataset validity")
         .checkH5LigerDatasetLink(object)
     } else {
-        message("Checking in memory ligerDataset validity")
+        # message("Checking in memory ligerDataset validity")
         .checkLigerDatasetBarcodes(object)
     }
     # TODO more checks
@@ -276,8 +277,18 @@ setMethod(
                     cat(paste0(slot, ":"), nrow(data), "features\n")
                 }
                 if (inherits(data, "H5D")) {
-                    cat(paste0(slot, ":"), paste(data$dims, collapse = " x "),
-                        "values in H5D object\n")
+                    if (length(data$dims) == 1) {
+                        cat(paste0(slot, ":"), data$dims,
+                            "non-zero values in H5D object\n")
+                    } else {
+                        cat(paste0(slot, ":"),
+                            paste(data$dims, collapse = " x "),
+                            "values in H5D object\n")
+                    }
+                }
+                if (inherits(data, "H5Group")) {
+                    cat(paste0(slot, ":"), data[["data"]]$dims,
+                        "non-zero values in H5Group object\n")
                 }
             }
         }
@@ -583,6 +594,20 @@ setReplaceMethod(
     }
 )
 
+#' @export
+#' @rdname ligerDataset-class
+setReplaceMethod(
+    "scaleData",
+    signature(x = "ligerDataset", check = "ANY", value = "H5Group"),
+    function(x, check = TRUE, value) {
+        if (!isH5Liger(x))
+            stop("Cannot replace slot with on-disk data for in-memory object.")
+        x@scaleData <- value
+        if (isTRUE(check)) methods::validObject(x)
+        x
+    }
+)
+
 #' @export
 #' @rdname ligerDataset-class
 setGeneric(
@@ -650,6 +675,20 @@ setReplaceMethod(
     }
 )
 
+#' @export
+#' @rdname ligerDataset-class
+setReplaceMethod(
+    "scaleUnsharedData",
+    signature(x = "ligerDataset", check = "ANY", value = "H5Group"),
+    function(x, check = TRUE, value) {
+        if (!isH5Liger(x))
+            stop("Cannot replace slot with on-disk data for in-memory object.")
+        x@scaleUnsharedData <- value
+        if (isTRUE(check)) methods::validObject(x)
+        x
+    }
+)
+
 #' @export
 #' @rdname ligerDataset-class
 setGeneric(
diff --git a/R/optimizeUANLS.R b/R/optimizeUANLS.R
deleted file mode 100644
index b01d1631..00000000
--- a/R/optimizeUANLS.R
+++ /dev/null
@@ -1,289 +0,0 @@
-#' Perform iNMF on scaled datasets, and include unshared features
-#' @param object \linkS4class{liger} object. Should run
-#' \code{\link{selectGenes}} with \code{unshared = TRUE} and then run
-#' \code{\link{scaleNotCenter}} in advance.
-#' @param k Integer, inner dimension of factorization (number of factors).
-#' Default \code{30}.
-#' @param lambda Numeric, the lambda penalty. Default \code{5}.
-#' @param thresh Numeric, convergence threshold. Convergence occurs when
-#' \eqn{|obj_0-obj|/(mean(obj_0,obj)) < thresh}. Default \code{1e-10}.
-#' @param maxIter Maximum number of block coordinate descent iterations to
-#' perform. Default \code{30}.
-#' @param nrep Number of restarts to perform (iNMF objective function is
-#' non-convex, so taking the best objective from multiple successive
-#' initializations is recommended). For easier reproducibility, this increments
-#' the random seed by 1 for each consecutive restart, so future factorizations
-#' of the same dataset can be run with one rep if necessary. Default \code{1}.
-#' @param seed Random seed to allow reproducible results. Default \code{1}.
-#' @param verbose Logical. Whether to show information of the progress. Default
-#' \code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.
-#' @noRd
-optimizeUANLS <- function(
-        object,
-        k = 30,
-        lambda = 5,
-        maxIter = 30,
-        nrep = 1,
-        thresh = 1e-10,
-        seed = 1,
-        verbose = getOption("ligerVerbose")
-) {
-    set.seed(seed)
-    if (isTRUE(verbose))
-        .log('Performing Factorization using UINMF and unshared features')
-    if (length(lambda) == 1)
-        lambda <- rep(lambda, length(object))
-    else if (length(lambda) != length(object))
-        stop("Vectorized lambda should have one value for each dataset.")
-
-    # Get a list of all the matrices
-    # scaleData in mlist: gene x cell
-    mlist <- getMatrix(object, "scaleData", returnList = TRUE)
-    mlist <- lapply(mlist, as.matrix)
-    xdim <- lapply(mlist, dim)
-
-    # Return what datasets have unshared features,
-    # and the dimensions of those unshared features
-    ulist <- getMatrix(object, "scaleUnsharedData", returnList = TRUE)
-    ulist <- lapply(ulist, as.matrix)
-    udim <- lapply(ulist, dim)
-    unshared <- which(sapply(ulist, function(x) !is.null(x)))
-    max_feats <- max(unlist(lapply(ulist, nrow)))
-
-    # For every set of additional features less than the maximum,
-    # append an additional zero matrix s.t. it matches the maximum
-    for (i in seq_along(object)) {
-        if (i %in% unshared)
-            # X: (g+u) x c
-            mlist[[i]] <- rbind(mlist[[i]], ulist[[i]])
-    }
-
-    X <- mlist
-
-    # Create an 0 matrix the size of U for all U's, s.t. it can be stacked to W
-    zero_matrix_u_full <- c()
-    zero_matrix_u_partial <- c()
-    for (i in seq_along(object)) {
-        if (i %in% unshared) {
-            # u x c
-            zero_matrix_u_full[[i]] <-
-                matrix(0, nrow = udim[[i]][1], ncol = udim[[i]][2])
-            # u x k
-            zero_matrix_u_partial[[i]] <-
-                matrix(0, nrow = udim[[i]][1], ncol = k)
-        }
-    }
-
-    nCells <- sapply(X, ncol)
-    nGenes <- length(varFeatures(object))
-    bestObj <- Inf
-
-    for (i in seq(nrep)) {
-        current <- seed + i - 1
-        # initialization
-        idX <- list()
-
-        for (i in seq_along(X)) idX[[i]] <- sample(nCells[i], k)
-
-        # Establish V from only the RNA dimensions
-        # V matrices: g x k
-        V <- list()
-        for (i in seq_along(X))
-            V[[i]] <- as.matrix(scaleData(object, i)[, idX[[i]]])
-        # Establish W from the shared gene dimensions
-        # W matrices: g x k
-        W <- matrix(stats::runif(nGenes*k, 0, 2), nGenes, k)
-        H <- list()
-        # Initialize U
-        U <- list()
-        for (i in seq_along(X)) {
-            if (i %in% unshared) {
-                # u x k
-                U[[i]] <- ulist[[i]][, idX[[i]]]
-            }
-        }
-
-        iter <- 0
-        total_time <- 0
-        if (isTRUE(verbose))
-            pb <- utils::txtProgressBar(min = 0, max = maxIter, style = 3)
-        sqrtLambda <- lapply(lambda, sqrt)
-
-        # Initial Training Objects
-
-        obj_train_approximation <- 0
-        obj_train_penalty <- 0
-
-        for (i in seq_along(X)) {
-            # H: k x c
-            H[[i]] = matrix(stats::runif(k * nCells[i], 0, 2), k, nCells[i])
-            if (i %in% unshared) {
-                obj_train_approximation <- obj_train_approximation +
-                    norm(X[[i]] - (rbind(W, zero_matrix_u_partial[[i]]) +
-                                       rbind(V[[i]], U[[i]])) %*% H[[i]],
-                         "F") ^ 2
-                obj_train_penalty <- obj_train_penalty +
-                    lambda[[i]] * norm(rbind(V[[i]], U[[i]]) %*% H[[i]],
-                                       "F") ^ 2
-            } else {
-                obj_train_approximation <- obj_train_approximation +
-                    norm(X[[i]] - (W + V[[i]]) %*% H[[i]], "F") ^ 2
-                obj_train_penalty <- obj_train_penalty +
-                    lambda[[i]] * norm(V[[i]] %*% H[[i]], "F") ^ 2
-
-            }
-        }
-        obj_train <- obj_train_approximation + obj_train_penalty
-
-        #### Initialize Object Complete
-        #### Begin Updates
-        delta <- Inf
-        iter <- 1
-        while (delta > thresh & iter <= maxIter) {
-            iter_start_time <- Sys.time()
-
-            # H - Updates
-            # H: k x c
-            for (i in seq_along(X)) {
-                if (!(i %in% unshared)) {
-                    H[[i]] <- solveNNLS(
-                        rbind(W + V[[i]], sqrtLambda[[i]] * V[[i]]),
-                        rbind(X[[i]], matrix(0, nGenes, xdim[[i]][2]))
-                    )
-                } else {
-                    H[[i]] = solveNNLS(
-                        rbind(
-                            rbind(W, zero_matrix_u_partial[[i]]) +
-                                rbind(V[[i]], U[[i]]),
-                            sqrtLambda[[i]]*rbind(V[[i]], U[[i]])
-                        ),
-                        rbind(X[[i]],
-                              matrix(0, nGenes + udim[[i]][1], xdim[[i]][2]))
-                    )
-                }
-            }
-
-            # V - updates
-            for (i in seq_along(X)) {
-                V[[i]] = t(solveNNLS(
-                    rbind(t(H[[i]]), sqrtLambda[[i]] * t(H[[i]])),
-                    rbind(
-                        t(X[[i]][seq(nGenes), ] - W %*% H[[i]]),
-                        matrix(0, nCells[i], nGenes)
-                    )
-                ))
-            }
-
-            # U - updates
-            # U: u x k
-            for (i in seq_along(X)) {
-                if (i %in% unshared) {
-                    U[[i]] = t(solveNNLS(
-                        rbind(t(H[[i]]), sqrtLambda[[i]] * t(H[[i]])),
-                        rbind(
-                            t(X[[i]][seq(nGenes + 1, udim[[i]][1] + nGenes),]),
-                            t(zero_matrix_u_full[[i]])
-                        )
-                    ))
-                }
-            }
-
-
-            # W - updates
-            # H_t_stack: C x k
-            # ("C" for all cells, "c" for number of cells in each dataset)
-            H_t_stack <- t(Reduce(cbind, H))
-            # diff_stack_w: C x g
-            diff_stack_w = c()
-            for (i in seq_along(X)) {
-                diff_stack_w = cbind(
-                    diff_stack_w,
-                    X[[i]][seq(nGenes), ] - V[[i]] %*% H[[i]]
-                )
-            }
-            diff_stack_w <- t(diff_stack_w)
-            # W: g x k
-            W = t(solveNNLS(H_t_stack, diff_stack_w))
-
-            ## End of updates
-            iter_end_time <- Sys.time()
-            iter_time <- as.numeric(difftime(iter_end_time, iter_start_time,
-                                             units = "secs"))
-            total_time <- total_time + iter_time
-
-            #Updating training object
-            obj_train_prev <- obj_train
-            obj_train_approximation <- 0
-            obj_train_penalty <- 0
-
-            for (i in seq_along(X)) {
-                if (i %in% unshared) {
-                    obj_train_approximation <- obj_train_approximation +
-                        norm(
-                            X[[i]] - (rbind(W, zero_matrix_u_partial[[i]]) +
-                                          rbind(V[[i]], U[[i]])) %*% H[[i]],
-                            "F") ^ 2
-                    obj_train_penalty <- obj_train_penalty + lambda[[i]] *
-                        norm(rbind(V[[i]], U[[i]]) %*% H[[i]], "F") ^ 2
-                }
-                else {
-                    obj_train_approximation <- obj_train_approximation +
-                        norm(X[[i]] - (W + V[[i]]) %*% H[[i]], "F") ^ 2
-                    obj_train_penalty = obj_train_penalty + lambda[[i]] *
-                        norm(V[[i]] %*% H[[i]], "F") ^ 2
-                }
-            }
-
-            obj_train <- obj_train_approximation + obj_train_penalty
-            delta <- abs(obj_train_prev - obj_train) /
-                mean(c(obj_train_prev, obj_train))
-            iter <- iter + 1
-            if (isTRUE(verbose)) utils::setTxtProgressBar(pb = pb, value = iter)
-        }
-        if (isTRUE(verbose)) {
-            utils::setTxtProgressBar(pb = pb, value = maxIter)
-            cat("\n")
-            .log("Current seed: ", current, "; Current objective: ", obj_train)
-        }
-        if (obj_train < bestObj) {
-            W_m <- W
-            H_m <- H
-            V_m <- V
-            U_m <- U
-            bestObj <- obj_train
-            best_seed <- current
-        }
-    }
-
-    factorNames <- paste0("factor_", seq(k))
-
-    rownames(W_m) <- varFeatures(object)
-    colnames(W_m) <- factorNames
-    object@W <- W_m
-
-    for (i in seq_along(object)) {
-        ld <- dataset(object, i)
-
-        rownames(V_m[[i]]) <- varFeatures(object)
-        colnames(V_m[[i]]) <- factorNames
-        ld@V <- V_m[[i]]
-
-        rownames(H_m[[i]]) <- factorNames
-        colnames(H_m[[i]]) <- colnames(X[[i]])
-        ld@H <- H_m[[i]]
-
-        if (i %in% unshared) {
-            rownames(U_m[[i]]) <- ld@varUnsharedFeatures
-            colnames(U_m[[i]]) <- factorNames
-            ld@U <- U_m[[i]]
-        }
-        datasets(object, check = FALSE)[[i]] <- ld
-    }
-    param <- list(k = k, lambda = lambda)
-    object@uns$factorization <- param
-
-    if (isTRUE(verbose))
-        .log("Objective: ", bestObj, "\nBest results with seed: ", best_seed)
-
-    return(object)
-}
diff --git a/R/runBPPINMF.R b/R/runBPPINMF.R
deleted file mode 100644
index 87d1fe93..00000000
--- a/R/runBPPINMF.R
+++ /dev/null
@@ -1,311 +0,0 @@
-#' Perform iNMF on scaled datasets
-#' @description
-#' Performs integrative non-negative matrix factorization (iNMF) (J.D. Welch,
-#' 2019) to return factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. The
-#' objective function is stated as
-#'
-#' \deqn{\arg\min_{H\ge0,W\ge0,V\ge0}\sum_{i}^{d}||E_i-(W+V_i)Hi||^2_F+\lambda\sum_{i}^{d}||V_iH_i||_F^2}
-#'
-#' where \eqn{E_i} is the input non-negative matrix of the i'th dataset, \eqn{d}
-#' is the total number of datasets.
-#'
-#' The factorization produces a shared \eqn{W} matrix (genes by k), and for each
-#' dataset, an \eqn{H} matrix (k by cells) and a \eqn{V} matrix (genes by k).
-#' The \eqn{H} matrices represent the cell factor loadings. \eqn{W} is held
-#' consistent among all datasets, as it represents the shared components of the
-#' metagenes across datasets. The \eqn{V} matrices represent the
-#' dataset-specific components of the metagenes.
-#'
-#' This function adopts highly optimized fast and memory efficient
-#' implementation extended from Planc (Kannan, 2016). Pre-installation of
-#' extension package \code{RcppPlanc} is required. The underlying algorithm
-#' adopts the identical ANLS strategy as \code{\link{optimizeALS}} in the old
-#' version of LIGER.
-#' @param object A \linkS4class{liger} object, a Seurat object or a named list
-#' of matrix, dgCMatrix, H5D objects, where the names represents dataset names
-#' and matrices are scaled on the same set of variable features, with rows as
-#' features and columns as cells.
-#' @param k Inner dimension of factorization (number of factors). Run
-#' \code{\link{suggestK}} to determine appropriate value; a general rule of
-#' thumb is that a higher \code{k} will be needed for datasets with more
-#' sub-structure.
-#' @param lambda Regularization parameter. Larger values penalize
-#' dataset-specific effects more strongly (i.e. alignment should increase as
-#' \code{lambda} increases). Default \code{5}.
-#' @param nIteration Total number of block coordinate descent iterations to
-#' perform. Default \code{30}.
-#' @param nRandomStarts Number of restarts to perform (iNMF objective function
-#' is non-convex, so taking the best objective from multiple successive
-#' initialization is recommended). For easier reproducibility, this increments
-#' the random seed by 1 for each consecutive restart, so future factorization
-#' of the same dataset can be run with one rep if necessary. Default \code{1}.
-#' @param HInit Initial values to use for \eqn{H} matrices. A list object where
-#' each element is the initial \eqn{H} matrix of each dataset. Default
-#' \code{NULL}.
-#' @param WInit Initial values to use for \eqn{W} matrix. A matrix object.
-#' Default \code{NULL}.
-#' @param VInit Initial values to use for \eqn{V} matrices. A list object where
-#' each element is the initial \eqn{V} matrix of each dataset. Default
-#' \code{NULL}.
-#' @param seed Random seed to allow reproducible results. Default \code{1}.
-#' @param verbose Logical. Whether to show information of the progress. Default
-#' \code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.
-#' @param ... Arguments passed to methods.
-#' @rdname runBPPINMF
-#' @export
-#' @examples
-#' pbmc <- normalize(pbmc)
-#' pbmc <- selectGenes(pbmc)
-#' pbmc <- scaleNotCenter(pbmc)
-#' pbmc <- runBPPINMF(pbmc, k = 20)
-runBPPINMF <- function(
-        object,
-        k,
-        lambda = 5.0,
-        nIteration = 30,
-        nRandomStarts = 1,
-        HInit = NULL,
-        WInit = NULL,
-        VInit = NULL,
-        seed = 1,
-        verbose = getOption("ligerVerbose"),
-        ...
-) {
-    UseMethod("runBPPINMF", object)
-}
-
-#' @rdname runBPPINMF
-#' @export
-#' @param readH5 \code{TRUE} to force reading H5 based data into memory and
-#' conduct factorization. \code{"auto"} reads H5 dataset with less than 8000
-#' cells. \code{FALSE} will stop users from running if H5 data presents.
-#' @method runBPPINMF liger
-#' @return The liger method returns the input \linkS4class{liger} object with
-#' factorization result updated. A list of all \eqn{H} matrices can be accessed
-#' with \code{getMatrix(object, "H")}, a list of all \eqn{V} matrices can be
-#' accessed with \code{getMatrix(object, "V")}, and the \eqn{W} matrix can be
-#' accessed with \code{getMatrix(object, "W")}.
-runBPPINMF.liger <- function(
-        object,
-        k,
-        lambda = 5.0,
-        nIteration = 30,
-        nRandomStarts = 1,
-        HInit = NULL,
-        WInit = NULL,
-        VInit = NULL,
-        seed = 1,
-        verbose = getOption("ligerVerbose"),
-        readH5 = "auto",
-        ...
-) {
-    .checkObjVersion(object)
-    object <- recordCommand(object, dependencies = "RcppPlanc")
-    object <- removeMissing(object, orient = "cell", verbose = verbose)
-    data <- lapply(datasets(object), function(ld) {
-        if (is.null(scaleData(ld)))
-            stop("Scaled data not available. ",
-                 "Run `scaleNotCenter(object)` first")
-        return(scaleData(ld))
-    })
-    dataClasses <- sapply(data, function(x) class(x)[1])
-    if (!all(dataClasses == dataClasses[1])) {
-        stop("Currently the scaledData of all datasets have to be of the same class.")
-    }
-    out <- runBPPINMF.list(
-        object = data,
-        k = k,
-        lambda = lambda,
-        nIteration = nIteration,
-        nRandomStarts = nRandomStarts,
-        HInit = HInit,
-        WInit = WInit,
-        VInit = VInit,
-        seed = seed,
-        verbose = verbose,
-        barcodeList = lapply(datasets(object), colnames),
-        features = varFeatures(object)
-    )
-
-    object@W <- out$W
-    for (d in names(object)) {
-        ld <- dataset(object, d)
-        ld@H <- out$H[[d]]
-        ld@V <- out$V[[d]]
-        datasets(object, check = FALSE)[[d]] <- ld
-    }
-    object@uns$factorization$k <- k
-    object@uns$factorization$lambda <- lambda
-    return(object)
-}
-
-#' @rdname runBPPINMF
-#' @export
-#' @param barcodeList List object of barcodes for each datasets, for setting
-#' dimnames of output \eqn{H} matrices. Default \code{NULL} uses \code{colnames}
-#' of matrices in the \code{object}.
-#' @param features Character vector of feature names, for setting dimnames of
-#' output \eqn{V} and \eqn{W} matrices. Default \code{NULL} uses \code{rownames}
-#' of matrices in the \code{object}.
-#' @return The list method returns a list of entries \code{H}, \code{V} and
-#' \code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
-#' is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
-#' \eqn{W} matrix.
-#' @method runBPPINMF list
-runBPPINMF.list <- function(
-        object,
-        k,
-        lambda = 5.0,
-        nIteration = 30,
-        nRandomStarts = 1,
-        HInit = NULL,
-        WInit = NULL,
-        VInit = NULL,
-        seed = 1,
-        verbose = getOption("ligerVerbose"),
-        barcodeList = NULL,
-        features = NULL,
-        ...
-) {
-    if (!requireNamespace("RcppPlanc", quietly = TRUE))
-        stop("RcppPlanc installation required. Currently, please get the ",
-             "GitHub private repository access from the lab and run: \n",
-             "devtools::install_github(\"welch-lab/RcppPlanc\")")
-
-    if (is.null(barcodeList)) {
-        barcodeList <- lapply(object, colnames)
-    } else if (!identical(sapply(barcodeList, length), sapply(object, ncol))) {
-        stop("Given `barcodeList` cannot match to columns of all matrices.")
-    }
-
-    if (is.null(features)) features <- rownames(object[[1]])
-    else {
-        if (!identical(length(features), unique(sapply(object, nrow)))) {
-            stop("Either the length of `features` does not match with the ",
-                 "rows of all matrices, or the matrices have varied numbers ",
-                 "of rows.")
-        }
-    }
-
-    nCells <- sapply(object, ncol)
-    nGenes <- nrow(object[[1]])
-    nDatasets <- length(object)
-    datasetNames <- names(object)
-    if (k >= nGenes) {
-        stop("Select k lower than the number of variable genes: ", nGenes)
-    }
-    Wm <- Vm <- Hm <- NULL
-    bestObj <- Inf
-    bestSeed <- seed
-    for (i in seq(nRandomStarts)) {
-        if (isTRUE(verbose) && nRandomStarts > 1) {
-            .log("Replicate run ", i, "...")
-        }
-        set.seed(seed = seed + i - 1)
-        if (!is.null(WInit))
-            W <- t(.checkInit(WInit, nCells, nGenes, k, "W"))
-        else W <- t(matrix(stats::runif(nGenes * k, 0, 2), k, nGenes))
-        if (!is.null(VInit)) {
-            V <- .checkInit(VInit, nCells, nGenes, k, "V")
-            V <- lapply(V, t)
-        } else {
-            V <- lapply(seq(nDatasets), function(i) {
-                t(matrix(stats::runif(nGenes * k, 0, 2), k, nGenes))})
-        }
-        if (!is.null(HInit)) {
-            H <- .checkInit(HInit, nCells, nGenes, k, "H")
-            H <- lapply(H, t)
-        } else {
-            H <- lapply(nCells, function(n) {
-                matrix(stats::runif(n * k, 0, 2), n, k)
-            })
-        }
-        if (inherits(object[[1]], "H5D")) {
-            # RcppPlanc::bppinmf_h5dense()
-            stop("TODO: Push Yichen to test bppinmf_h5sparse/bppinmf_h5dense!")
-        } else {
-            out <- RcppPlanc::bppinmf(
-                objectList = object, k = k, lambda = lambda, niter = nIteration,
-                verbose = verbose)
-        }
-        if (out$objErr < bestObj) {
-            Wm <- out$W
-            Hm <- out$H
-            Vm <- out$V
-            bestObj <- out$objErr
-            bestSeed <- seed + i - 1
-        }
-    }
-    if (isTRUE(verbose) && nRandomStarts > 1) {
-        .log("Best objective error: ", bestObj, "\nBest seed: ", bestSeed)
-    }
-    factorNames <- paste0("Factor_", seq(k))
-    for (i in seq(nDatasets)) {
-        Hm[[i]] <- t(Hm[[i]])
-        colnames(Hm[[i]]) <- barcodeList[[i]]
-        rownames(Hm[[i]]) <- factorNames
-        rownames(Vm[[i]]) <- features
-        colnames(Vm[[i]]) <- factorNames
-    }
-    names(Vm) <- names(Hm) <- names(object)
-    rownames(Wm) <- features
-    colnames(Wm) <- factorNames
-    return(list(H = Hm, V = Vm, W = Wm))
-}
-
-#' @rdname runBPPINMF
-#' @export
-#' @param datasetVar Variable name in metadata indicating a factor of dataset
-#' belonging, or directly a factor that match with the number of cells.
-#' @return The Seurat method returns a list of entries \code{H}, \code{V} and
-#' \code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
-#' is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
-#' \eqn{W} matrix.
-#' @method runBPPINMF Seurat
-runBPPINMF.Seurat <- function(
-        object,
-        datasetVar,
-        k,
-        lambda = 5.0,
-        nIteration = 30,
-        nRandomStarts = 1,
-        HInit = NULL,
-        WInit = NULL,
-        VInit = NULL,
-        seed = 1,
-        verbose = getOption("ligerVerbose"),
-        ...
-) {
-    if (!requireNamespace("Seurat", quietly = TRUE)) {
-        stop("Seurat installation required. Please run\n",
-             "install.packages(\"Seurat\")")
-    }
-    EBind <- Seurat::GetAssayData(object, "scale.data")
-    if (any(EBind < 0)) {
-        stop("Non-negative Matrix Factorization requires non-negative data. ",
-             "Please scale the library-size-normalized data without centering.")
-    }
-    if (is.character(datasetVar) && length(datasetVar) == 1) {
-        datasetVar <- object[[datasetVar]][[1]]
-    }
-    if (!is.factor(datasetVar) || length(datasetVar) != ncol(EBind)) {
-        stop("Invalid `datasetVar`. Please see `?runBPPINMF` for instruction.")
-    }
-    datasetVar <- droplevels(datasetVar)
-    Es <- lapply(levels(datasetVar), function(d) {
-        as(EBind[, datasetVar == d], "CsparseMatrix")
-    })
-    names(Es) <- levels(datasetVar)
-    runBPPINMF.list(
-        object = Es,
-        k = k,
-        lambda = lambda,
-        nIteration = nIteration,
-        nRandomStarts = nRandomStarts,
-        HInit = HInit,
-        WInit = WInit,
-        VInit = VInit,
-        seed = seed,
-        verbose = verbose
-    )
-}
diff --git a/R/runINMF.R b/R/runINMF.R
index 5373e27c..d1e5b169 100644
--- a/R/runINMF.R
+++ b/R/runINMF.R
@@ -1,413 +1,272 @@
 #' Perform iNMF on scaled datasets
 #' @description
-#' Performs integrative non-negative matrix (iNMF) factorization to return
-#' factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. It optimizes the iNMF
-#' objective function using block coordinate descent (alternating non-negative
-#' least squares), where the number of factors is set by \code{k}. TODO: include
-#' objective function equation here in documentation (using deqn)
+#' Performs integrative non-negative matrix factorization (iNMF) (J.D. Welch,
+#' 2019) to return factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. The
+#' objective function is stated as
 #'
-#' For each dataset, this factorization produces an \eqn{H} matrix (cells by k),
-#' a \eqn{V} matrix (k by genes), and a shared \eqn{W} matrix (k by genes). The
-#' \eqn{H} matrices represent the cell factor loadings. \eqn{W} is held
+#' \deqn{\arg\min_{H\ge0,W\ge0,V\ge0}\sum_{i}^{d}||E_i-(W+V_i)Hi||^2_F+\lambda\sum_{i}^{d}||V_iH_i||_F^2}
+#'
+#' where \eqn{E_i} is the input non-negative matrix of the i'th dataset, \eqn{d}
+#' is the total number of datasets. \eqn{E_i} is of size \eqn{m \times n_i} for
+#' \eqn{m} variable genes and \eqn{n_i} cells, \eqn{H_i} is of size
+#' \eqn{n_i \times k}, \eqn{V_i} is of size \eqn{m \times k}, and \eqn{W} is of
+#' size \eqn{m \times k}.
+#'
+#' The factorization produces a shared \eqn{W} matrix (genes by k), and for each
+#' dataset, an \eqn{H} matrix (k by cells) and a \eqn{V} matrix (genes by k).
+#' The \eqn{H} matrices represent the cell factor loadings. \eqn{W} is held
 #' consistent among all datasets, as it represents the shared components of the
 #' metagenes across datasets. The \eqn{V} matrices represent the
 #' dataset-specific components of the metagenes.
-#' @param object A \linkS4class{liger} object or a named list of matrix object,
-#' where the names represents dataset names and matrices are scaled on the same
-#' set of variable features, with rows as features and columns as cells.
+#'
+#' This function adopts highly optimized fast and memory efficient
+#' implementation extended from Planc (Kannan, 2016). Pre-installation of
+#' extension package \code{RcppPlanc} is required. The underlying algorithm
+#' adopts the identical ANLS strategy as \code{\link{optimizeALS}} in the old
+#' version of LIGER.
+#' @param object A \linkS4class{liger} object, a Seurat object or a named list
+#' of matrix, dgCMatrix, H5D objects, where the names represents dataset names
+#' and matrices are scaled on the same set of variable features, with rows as
+#' features and columns as cells.
 #' @param k Inner dimension of factorization (number of factors). Run
 #' \code{\link{suggestK}} to determine appropriate value; a general rule of
 #' thumb is that a higher \code{k} will be needed for datasets with more
-#' sub-structure.
+#' sub-structure. Default \code{20}.
 #' @param lambda Regularization parameter. Larger values penalize
 #' dataset-specific effects more strongly (i.e. alignment should increase as
 #' \code{lambda} increases). Default \code{5}.
-#' @param thresh Convergence threshold. Convergence occurs when
-#' \eqn{|obj_0-obj|/(mean(obj_0,obj)) < thresh}. Default \code{1e-6}.
-#' @param maxIter Maximum number of block coordinate descent iterations to
+#' @param nIteration Total number of block coordinate descent iterations to
 #' perform. Default \code{30}.
-#' @param nrep Number of restarts to perform (iNMF objective function is
-#' non-convex, so taking the best objective from multiple successive
+#' @param nRandomStarts Number of restarts to perform (iNMF objective function
+#' is non-convex, so taking the best objective from multiple successive
 #' initialization is recommended). For easier reproducibility, this increments
 #' the random seed by 1 for each consecutive restart, so future factorization
 #' of the same dataset can be run with one rep if necessary. Default \code{1}.
-#' @param H.init Initial values to use for \eqn{H} matrices. A list object where
+#' @param HInit Initial values to use for \eqn{H} matrices. A list object where
 #' each element is the initial \eqn{H} matrix of each dataset. Default
 #' \code{NULL}.
-#' @param W.init Initial values to use for \eqn{W} matrix. A matrix object.
+#' @param WInit Initial values to use for \eqn{W} matrix. A matrix object.
 #' Default \code{NULL}.
-#' @param V.init Initial values to use for \eqn{V} matrices. A list object where
+#' @param VInit Initial values to use for \eqn{V} matrices. A list object where
 #' each element is the initial \eqn{V} matrix of each dataset. Default
 #' \code{NULL}.
-#' @param method NNLS subproblem solver. Choose from \code{"liger"} (default
-#' original implementation), \code{"planc"} or \code{"rcppml"}.
-#' @param useUnshared Logical, whether to include unshared variable features and
-#' run optimizeUANLS algorithm. Defaul \code{FALSE}. Running
-#' \code{\link{selectGenes}} with \code{unshared = TRUE} and then running
-#' \code{\link{scaleNotCenter}} is required.
 #' @param seed Random seed to allow reproducible results. Default \code{1}.
-#' @param readH5 \code{TRUE} to force reading H5 based data into memory and
-#' conduct factorization. \code{"auto"} reads H5 dataset with less than 8000
-#' cells. \code{FALSE} will stop users from running if H5 data presents.
 #' @param verbose Logical. Whether to show information of the progress. Default
 #' \code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.
-#' @param max.iters,use.unshared,rand.seed \bold{Deprecated}. See Usage section
-#' for replacement.
-#' @param print.obj \bold{Defunct}. Whether to print objective function values
-#' after convergence when \code{verbose = TRUE}. Now always print when verbose.
-#' @return \code{object} with \code{W} slot updated with the result \eqn{W}
-#' matrix, and the \code{H} and \code{V} slots of each
-#' \linkS4class{ligerDataset} object in the \code{datasets} slot updated with
-#' the dataset specific \eqn{H} and \eqn{V} matrix, respectively.
+#' @param ... Arguments passed to methods.
 #' @rdname runINMF
 #' @export
 #' @examples
 #' pbmc <- normalize(pbmc)
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
-#' # Only running a few iterations for fast examples
-#' pbmc <- runINMF(pbmc, k = 20, maxIter = 2)
-setGeneric(
-    "runINMF",
-    function(
+#' pbmc <- runINMF(pbmc, k = 20)
+runINMF <- function(
         object,
-        k,
+        k = 20,
         lambda = 5.0,
-        thresh = 1e-6,
-        maxIter = 30,
-        nrep = 1,
-        H.init = NULL,
-        W.init = NULL,
-        V.init = NULL,
-        method = c("planc", "liger", "rcppml"),
-        useUnshared = FALSE,
+        nIteration = 30,
+        nRandomStarts = 1,
+        HInit = NULL,
+        WInit = NULL,
+        VInit = NULL,
         seed = 1,
-        readH5 = "auto",
         verbose = getOption("ligerVerbose"),
-        # Deprecated coding style
-        max.iters = maxIter,
-        use.unshared = useUnshared,
-        rand.seed = seed,
-        # Deprecated functionality
-        print.obj = NULL
-    ) standardGeneric("runINMF")
-)
+        ...
+) {
+    UseMethod("runINMF", object)
+}
 
 #' @rdname runINMF
 #' @export
-setMethod(
-    "runINMF",
-    signature(object = "liger"),
-    function(
+#' @param readH5 \code{TRUE} to force reading H5 based data into memory and
+#' conduct factorization. \code{"auto"} reads H5 dataset with less than 8000
+#' cells. \code{FALSE} will stop users from running if H5 data presents.
+#' @method runINMF liger
+#' @return The liger method returns the input \linkS4class{liger} object with
+#' factorization result updated. A list of all \eqn{H} matrices can be accessed
+#' with \code{getMatrix(object, "H")}, a list of all \eqn{V} matrices can be
+#' accessed with \code{getMatrix(object, "V")}, and the \eqn{W} matrix can be
+#' accessed with \code{getMatrix(object, "W")}.
+runINMF.liger <- function(
         object,
-        k,
+        k = 20,
         lambda = 5.0,
-        thresh = 1e-6,
-        maxIter = 30,
-        nrep = 1,
-        H.init = NULL,
-        W.init = NULL,
-        V.init = NULL,
-        method = c("planc", "liger", "rcppml"),
-        useUnshared = FALSE,
+        nIteration = 30,
+        nRandomStarts = 1,
+        HInit = NULL,
+        WInit = NULL,
+        VInit = NULL,
         seed = 1,
-        readH5 = "auto",
         verbose = getOption("ligerVerbose"),
-        # Deprecated coding style
-        max.iters = maxIter,
-        use.unshared = useUnshared,
-        rand.seed = seed,
-        # Deprecated functionality
-        print.obj = NULL
-    ) {
-        .deprecateArgs(list(max.iters = "maxIter", use.unshared = "useUnshared",
-                            rand.seed = "seed"), defunct = "print.obj")
-        .checkObjVersion(object)
-        method <- match.arg(method)
-        object <- recordCommand(object)
-        if (isFALSE(useUnshared)) {
-            object <- removeMissing(object, orient = "cell",
-                                    verbose = verbose)
-            data <- lapply(datasets(object), function(ld) {
-                if (is.null(scaleData(ld)))
-                    stop("Scaled data not available. ",
-                         "Run `scaleNotCenter(object)` first")
-                if (isH5Liger(ld)) {
-                    if (!isFALSE(readH5)) {
-                        h5d <- scaleData(ld)
-                        if (readH5 == "auto") {
-                            if (h5d$dims[2] <= 8000) {
-                                warning("Automatically reading H5 based ",
-                                        "scaled dense matrix into memory. ",
-                                        "Dim: ", h5d$dims[1], "x", h5d$dims[2],
-                                        immediate. = verbose)
-                                return(h5d[,])
-                            } else {
-                                stop("Scaled data in H5 based dataset with ",
-                                     "more than 8000 cells will not be ",
-                                     "automatically read into memory. Use ",
-                                     "`readH5 = TRUE` to force reading, or ",
-                                     "try `online_iNMF()` instead.")
-                            }
-                        } else if (isTRUE(readH5)) {
-                            return(h5d[,])
-                        } else {
-                            stop("Can only set `readH5` to TRUE, FALSE, ",
-                                 "or 'auto'.")
-                        }
-                    } else {
-                        stop("H5 based dataset detected while `readH5` is ",
-                             "set to FALSE.")
-                    }
-                } else {
-                    return(scaleData(ld))
-                }
-            })
-            out <- runINMF(
-                object = data,
-                k = k,
-                lambda = lambda,
-                thresh = thresh,
-                maxIter = maxIter,
-                nrep = nrep,
-                H.init = H.init,
-                W.init = W.init,
-                V.init = V.init,
-                method = method,
-                useUnshared = FALSE,
-                seed = seed,
-                verbose = verbose
-            )
-            # return(out)
-            object@W <- out$W
-            rownames(object@W) <- varFeatures(object)
-            for (d in names(object)) {
-                ld <- dataset(object, d)
-                ld@H <- out$H[[d]]
-                colnames(ld@H) <- colnames(ld)
-                ld@V <- out$V[[d]]
-                rownames(ld@V) <- varFeatures(object)
-                datasets(object, check = FALSE)[[d]] <- ld
-            }
-            object@uns$factorization$k <- k
-            object@uns$factorization$lambda <- lambda
-        } else {
-            object <- optimizeUANLS(
-                object = object,
-                k = k,
-                lambda = lambda,
-                thresh = thresh,
-                maxIter = maxIter,
-                nrep = nrep,
-                seed = seed,
-                verbose = verbose
-            )
-        }
-        return(object)
+        readH5 = "auto",
+        ...
+) {
+    .checkObjVersion(object)
+    object <- recordCommand(object, dependencies = "RcppPlanc")
+    object <- removeMissing(object, orient = "cell", verbose = verbose)
+    data <- lapply(datasets(object), function(ld) {
+        if (is.null(scaleData(ld)))
+            stop("Scaled data not available. ",
+                 "Run `scaleNotCenter(object)` first")
+        return(scaleData(ld))
+    })
+    dataClasses <- sapply(data, function(x) class(x)[1])
+    if (!all(dataClasses == dataClasses[1])) {
+        stop("Currently the scaledData of all datasets have to be of the ",
+             "same class.")
     }
-)
+    out <- runINMF.list(
+        object = data,
+        k = k,
+        lambda = lambda,
+        nIteration = nIteration,
+        nRandomStarts = nRandomStarts,
+        HInit = HInit,
+        WInit = WInit,
+        VInit = VInit,
+        seed = seed,
+        verbose = verbose,
+        barcodeList = lapply(datasets(object), colnames),
+        features = varFeatures(object)
+    )
+
+    object@W <- out$W
+    for (d in names(object)) {
+        ld <- dataset(object, d)
+        ld@H <- out$H[[d]]
+        ld@V <- out$V[[d]]
+        datasets(object, check = FALSE)[[d]] <- ld
+    }
+    object@uns$factorization <- list(k = k, lambda = lambda)
+    return(object)
+}
 
 #' @rdname runINMF
 #' @export
-setMethod(
-    "runINMF",
-    signature(object = "list"),
-    function(
+#' @param barcodeList List object of barcodes for each datasets, for setting
+#' dimnames of output \eqn{H} matrices. Default \code{NULL} uses \code{colnames}
+#' of matrices in the \code{object}.
+#' @param features Character vector of feature names, for setting dimnames of
+#' output \eqn{V} and \eqn{W} matrices. Default \code{NULL} uses \code{rownames}
+#' of matrices in the \code{object}.
+#' @return The list method returns a list of entries \code{H}, \code{V} and
+#' \code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
+#' is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
+#' \eqn{W} matrix.
+#' @method runINMF list
+runINMF.list <- function(
         object,
-        k,
+        k = 20,
         lambda = 5.0,
-        thresh = 1e-6,
-        maxIter = 30,
-        nrep = 1,
-        H.init = NULL,
-        W.init = NULL,
-        V.init = NULL,
-        method = c("planc", "liger", "rcppml"),
-        useUnshared = FALSE,
+        nIteration = 30,
+        nRandomStarts = 1,
+        HInit = NULL,
+        WInit = NULL,
+        VInit = NULL,
         seed = 1,
-        readH5 = "auto",
         verbose = getOption("ligerVerbose"),
-        # Deprecated coding style
-        max.iters = maxIter,
-        use.unshared = useUnshared,
-        rand.seed = seed,
-        # Deprecated functionality
-        print.obj = NULL
-    ) {
-        .deprecateArgs(list(max.iters = "maxIter", use.unshared = "useUnshared",
-                            rand.seed = "seed"), defunct = "print.obj")
-        # if (!all(sapply(object, is.matrix))) {
-        #     stop("All values in 'object' must be a matrix")
-        # }
-        # E ==> cell x gene scaled matrices
-        method <- match.arg(method)
-        if (method == "planc" && !requireNamespace("RcppPlanc", quietly = TRUE))
-            stop("RcppPlanc installation required")
-        if (method == "rcppml" && !requireNamespace("RcppML", quietly = TRUE))
-            stop("RcppML installation required")
-        E <- lapply(object, t)
-        nDatasets <- length(E)
-        nCells <- sapply(E, nrow)
-        tmp <- gc() # nolint
-        nGenes <- ncol(E[[1]])
-        if (k >= nGenes) {
-            stop("Select k lower than the number of variable genes: ", nGenes)
-        }
-        Wm <- matrix(0, k, nGenes)
-        Vm <- rep(list(matrix(0, k, nGenes)), nDatasets)
-        Hm <- lapply(nCells, function(n) matrix(0, n, k))
-
-        bestObj <- Inf
-        bestSeed <- seed
-        runStats <- matrix(0, nrep, 2)
-        for (i in seq(nrep)) {
-            set.seed(seed = seed + i - 1)
-            startTime <- Sys.time()
-            if (!is.null(W.init))
-                W <- t(.checkInit(W.init, nCells, nGenes, k, "W"))
-            else W <- matrix(stats::runif(nGenes * k, 0, 2), k, nGenes)
-            if (!is.null(V.init)) {
-                V <- .checkInit(V.init, nCells, nGenes, k, "V")
-                V <- lapply(V, t)
-            } else
-                V <- lapply(seq(nDatasets), function(i) {
-                    matrix(stats::runif(nGenes * k, 0, 2), k, nGenes)})
-            if (!is.null(H.init)) {
-                H <- .checkInit(H.init, nCells, nGenes, k, "H")
-                H <- lapply(H, t)
-            } else
-                H <- lapply(nCells, function(n) {
-                    matrix(stats::runif(n * k, 0, 2), n, k)
-                })
+        barcodeList = NULL,
+        features = NULL,
+        ...
+) {
+    if (!requireNamespace("RcppPlanc", quietly = TRUE))
+        stop("RcppPlanc installation required. Currently, please get the ",
+             "GitHub private repository access from the lab and run: \n",
+             "devtools::install_github(\"welch-lab/RcppPlanc\")")
 
-            delta <- 1
-            iters <- 0
-            sqrtLambda <- sqrt(lambda)
-            obj0 <- inmf_calcObj(E, H, W, V, lambda)
-            # tmp <- gc()
-            if (isTRUE(verbose)) {
-                .log("Start iNMF with seed: ", seed + i - 1, "...")
-                pb <- utils::txtProgressBar(0, maxIter, style = 3)
-            }
-            # return(list(E = E, H = H, W = W, V = V))
-            while (delta > thresh & iters < maxIter) {
-                # .log("Iter: ", iters)
-                # .log("Solving for H")
-                H <- lapply(
-                    seq(nDatasets),
-                    function(i) {
-                        t(callNNLS(
-                            C = rbind(t(W + V[[i]]), sqrtLambda*t(V[[i]])),
-                            B = expandSpZeroRow(t(E[[i]])),
-                            method = method
-                        ))
-                    }
-                )
-                # tmp <- gc()
-                # .log("Solving for V")
-                # c x k, c x g ->  g x k
-                V <- lapply(
-                    seq(nDatasets),
-                    function(i) {
-                        callNNLS(
-                            C = rbind(H[[i]], sqrtLambda*H[[i]]),
-                            B = rbind(
-                                E[[i]] - H[[i]] %*% W,
-                                matrix(0, nCells[[i]], nGenes)
-                            ),
-                            method = method
-                        )
-                    }
-                )
-                # tmp <- gc()
-                # .log("Solving for W")
-                # .log("calc B")
-                wB <- rbindlist(
-                    lapply(seq(nDatasets),
-                           function(i) E[[i]] - H[[i]] %*% V[[i]]
-                    ))
-                # .log("bppnnls")
-                W <- callNNLS(C = rbindlist(H), B = wB,
-                              method = method)
-                # tmp <- gc()
-                obj <- inmf_calcObj(E, H, W, V, lambda)
-                # tmp <- gc()
-                delta <- abs(obj0 - obj) / (mean(obj0, obj))
-                obj0 <- obj
-                iters <- iters + 1
-                if (isTRUE(verbose))
-                    utils::setTxtProgressBar(pb, value = iters)
-            }
-            if (isTRUE(verbose)) {
-                utils::setTxtProgressBar(pb, value = maxIter)
-                cat("\n")
-            }
-            # if (iters == maxIter) {
-            #   print("Warning: failed to converge within the allowed number of iterations.
-            #         Re-running with a higher maxIter is recommended.")
-            # }
-            if (obj < bestObj) {
-                Wm <- W
-                Hm <- H
-                Vm <- V
-                bestObj <- obj
-                bestSeed <- seed + i - 1
-            }
-            endTime <- difftime(time1 = Sys.time(), time2 = startTime,
-                                units = "auto", )
-            runStats[i, 1] <- as.double(endTime)
-            runStats[i, 2] <- iters
-            if (isTRUE(verbose)) {
-                .log("Finished in ", runStats[i, 1], " ", units(endTime),
-                     ", ", iters, " iterations. \nMax iterations set: ",
-                     maxIter, "\nFinal objective delta: ", delta)
-                .log("Objective: ", obj)
-                .log("Best results with seed ", bestSeed)
-            }
+    bestResult <- list()
+    bestObj <- Inf
+    bestSeed <- seed
+    for (i in seq(nRandomStarts)) {
+        if (isTRUE(verbose) && nRandomStarts > 1) {
+            .log("Replicate run ", i, "...")
         }
-        out <- list(H = Hm, V = Vm, W = t(Wm))
-        factorNames <- paste0("Factor_", seq(k))
-        for (i in seq(nDatasets)) {
-            out$H[[i]] <- t(out$H[[i]])
-            colnames(out$H[[i]]) <- colnames(object[[i]])
-            rownames(out$H[[i]]) <- factorNames
-            out$V[[i]] <- t(out$V[[i]])
-            rownames(out$V[[i]]) <- rownames(object[[i]])
-            colnames(out$V[[i]]) <- factorNames
+        set.seed(seed = seed + i - 1)
+        if (inherits(object[[1]], "H5D")) {
+            # RcppPlanc::bppinmf_h5dense()
+            stop("TODO: Push Yichen to test bppinmf_h5sparse/bppinmf_h5dense!")
+        } else {
+            out <- RcppPlanc::inmf(objectList = object, k = k, lambda = lambda,
+                                   niter = nIteration, Hinit = HInit,
+                                   Vinit = VInit, Winit = WInit,
+                                   verbose = verbose)
+        }
+        if (out$objErr < bestObj) {
+            bestResult <- out
+            bestObj <- out$objErr
+            bestSeed <- seed + i - 1
         }
-        names(out$V) <- names(out$H) <- names(object)
-        rownames(out$W) <- rownames(object[[1]])
-        colnames(out$W) <- factorNames
-        return(out)
     }
-)
-
-# Binds list of matrices row-wise (vertical stack)
-rbindlist <- function(mat_list) do.call(rbind, mat_list)
-
-expandSpZeroRow <- function(E) {
-    dimnames(E) <- list(NULL, NULL)
-    E@Dim <- c(as.integer(2*nrow(E)), as.integer(ncol(E)))
-    return(E)
-}
-
-inmf_calcObj <- function(E, H, W, V, lambda) {
-    # E - dgCMatrix
-    # H, W, V - matrix
-    obj <- 0
-    for (i in seq_along(H)) {
-        obj <- obj +
-            Matrix::norm(E[[i]] - H[[i]] %*% (W + V[[i]]), "F") ^ 2 +
-            lambda*norm(H[[i]] %*% V[[i]], "F") ^ 2
+    if (isTRUE(verbose) && nRandomStarts > 1) {
+        .log("Best objective error: ", bestObj, "\nBest seed: ", bestSeed)
     }
-    return(obj)
+    barcodeList <- lapply(object, colnames)
+    features <- rownames(object[[1]])
+    factorNames <- paste0("Factor_", seq(k))
+    for (i in seq_along(object)) {
+        bestResult$H[[i]] <- t(bestResult$H[[i]])
+        dimnames(bestResult$H[[i]]) <- list(factorNames, barcodeList[[i]])
+        dimnames(bestResult$V[[i]]) <- list(features, factorNames)
+    }
+    names(bestResult$V) <- names(bestResult$H) <- names(object)
+    dimnames(bestResult$W) <- list(features, factorNames)
+    return(bestResult)
 }
 
-callNNLS <- function(C, B, method = c("planc", "liger", "rcppml")) {
-    method <- match.arg(method)
-    switch(method,
-        planc = RcppPlanc::bppnnls(C, methods::as(B, "CsparseMatrix")),
-        liger = solveNNLS(C, as.matrix(B)),
-        rcppml = RcppML::project(w = C, data = B)
+#' @rdname runINMF
+#' @export
+#' @param datasetVar Variable name in metadata indicating a factor of dataset
+#' belonging, or directly a factor that match with the number of cells.
+#' @return The Seurat method returns a list of entries \code{H}, \code{V} and
+#' \code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
+#' is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
+#' \eqn{W} matrix.
+#' @method runINMF Seurat
+runINMF.Seurat <- function(
+        object,
+        datasetVar,
+        k,
+        lambda = 5.0,
+        nIteration = 30,
+        nRandomStarts = 1,
+        HInit = NULL,
+        WInit = NULL,
+        VInit = NULL,
+        seed = 1,
+        verbose = getOption("ligerVerbose"),
+        ...
+) {
+    if (!requireNamespace("Seurat", quietly = TRUE)) {
+        stop("Seurat installation required. Please run\n",
+             "install.packages(\"Seurat\")")
+    }
+    EBind <- Seurat::GetAssayData(object, "scale.data")
+    if (any(EBind < 0)) {
+        stop("Non-negative Matrix Factorization requires non-negative data. ",
+             "Please scale the library-size-normalized data without centering.")
+    }
+    if (is.character(datasetVar) && length(datasetVar) == 1) {
+        datasetVar <- object[[datasetVar]][[1]]
+    }
+    if (!is.factor(datasetVar) || length(datasetVar) != ncol(EBind)) {
+        stop("Invalid `datasetVar`. Please see `?runINMF` for instruction.")
+    }
+    datasetVar <- droplevels(datasetVar)
+    Es <- lapply(levels(datasetVar), function(d) {
+        as(EBind[, datasetVar == d], "CsparseMatrix")
+    })
+    names(Es) <- levels(datasetVar)
+    runINMF.list(
+        object = Es,
+        k = k,
+        lambda = lambda,
+        nIteration = nIteration,
+        nRandomStarts = nRandomStarts,
+        HInit = HInit,
+        WInit = WInit,
+        VInit = VInit,
+        seed = seed,
+        verbose = verbose
     )
 }
diff --git a/R/optimizeALS.R b/R/runINMF_R.R
similarity index 64%
rename from R/optimizeALS.R
rename to R/runINMF_R.R
index 1bb9f2c3..399a4a9b 100644
--- a/R/optimizeALS.R
+++ b/R/runINMF_R.R
@@ -39,6 +39,8 @@
 #' @param V.init Initial values to use for \eqn{V} matrices. A list object where
 #' each element is the initial \eqn{V} matrix of each dataset. Default
 #' \code{NULL}.
+#' @param method NNLS subproblem solver. Choose from \code{"liger"} (default
+#' original implementation), \code{"planc"} or \code{"rcppml"}.
 #' @param useUnshared Logical, whether to include unshared variable features and
 #' run optimizeUANLS algorithm. Defaul \code{FALSE}. Running
 #' \code{\link{selectGenes}} with \code{unshared = TRUE} and then running
@@ -57,16 +59,16 @@
 #' matrix, and the \code{H} and \code{V} slots of each
 #' \linkS4class{ligerDataset} object in the \code{datasets} slot updated with
 #' the dataset specific \eqn{H} and \eqn{V} matrix, respectively.
-#' @rdname optimizeALS
+#' @rdname runINMF_R
 #' @export
 #' @examples
 #' pbmc <- normalize(pbmc)
 #' pbmc <- selectGenes(pbmc)
 #' pbmc <- scaleNotCenter(pbmc)
 #' # Only running a few iterations for fast examples
-#' pbmc <- optimizeALS(pbmc, k = 20, maxIter = 2)
+#' pbmc <- runINMF(pbmc, k = 20, maxIter = 2)
 setGeneric(
-    "optimizeALS",
+    "runINMF_R",
     function(
         object,
         k,
@@ -77,23 +79,18 @@ setGeneric(
         H.init = NULL,
         W.init = NULL,
         V.init = NULL,
+        method = c("planc", "liger", "rcppml"),
         useUnshared = FALSE,
         seed = 1,
         readH5 = "auto",
-        verbose = getOption("ligerVerbose"),
-        # Deprecated coding style
-        max.iters = maxIter,
-        use.unshared = useUnshared,
-        rand.seed = seed,
-        # Deprecated functionality
-        print.obj = NULL
-    ) standardGeneric("optimizeALS")
+        verbose = getOption("ligerVerbose")
+    ) standardGeneric("runINMF_R")
 )
 
-#' @rdname optimizeALS
+#' @rdname runINMF_R
 #' @export
 setMethod(
-    "optimizeALS",
+    "runINMF_R",
     signature(object = "liger"),
     function(
         object,
@@ -108,16 +105,8 @@ setMethod(
         useUnshared = FALSE,
         seed = 1,
         readH5 = "auto",
-        verbose = getOption("ligerVerbose"),
-        # Deprecated coding style
-        max.iters = maxIter,
-        use.unshared = useUnshared,
-        rand.seed = seed,
-        # Deprecated functionality
-        print.obj = NULL
+        verbose = getOption("ligerVerbose")
     ) {
-        .deprecateArgs(list(max.iters = "maxIter", use.unshared = "useUnshared",
-                            rand.seed = "seed"), defunct = "print.obj")
         .checkObjVersion(object)
         object <- recordCommand(object)
         if (isFALSE(useUnshared)) {
@@ -155,10 +144,10 @@ setMethod(
                              "set to FALSE.")
                     }
                 } else {
-                    return(as.matrix(scaleData(ld)))
+                    return(scaleData(ld))
                 }
             })
-            out <- optimizeALS(
+            out <- runINMF_R(
                 object = data,
                 k = k,
                 lambda = lambda,
@@ -173,19 +162,16 @@ setMethod(
                 verbose = verbose
             )
             object@W <- out$W
-            rownames(object@W) <- varFeatures(object)
             for (d in names(object)) {
                 ld <- dataset(object, d)
                 ld@H <- out$H[[d]]
-                colnames(ld@H) <- colnames(ld)
                 ld@V <- out$V[[d]]
-                rownames(ld@V) <- varFeatures(object)
                 datasets(object, check = FALSE)[[d]] <- ld
             }
             object@uns$factorization$k <- k
             object@uns$factorization$lambda <- lambda
         } else {
-            object <- optimizeUANLS(
+            object <- runUINMF(
                 object = object,
                 k = k,
                 lambda = lambda,
@@ -200,65 +186,53 @@ setMethod(
     }
 )
 
-#' @rdname optimizeALS
+#' @rdname runINMF_R
 #' @export
 setMethod(
-    "optimizeALS",
+    "runINMF_R",
     signature(object = "list"),
     function(
         object,
         k,
         lambda = 5.0,
-        thresh = 1e-6,
         maxIter = 30,
         nrep = 1,
         H.init = NULL,
         W.init = NULL,
         V.init = NULL,
+        method = c("planc", "liger", "rcppml"),
         useUnshared = FALSE,
         seed = 1,
         readH5 = "auto",
-        verbose = getOption("ligerVerbose"),
-        # Deprecated coding style
-        max.iters = maxIter,
-        use.unshared = useUnshared,
-        rand.seed = seed,
-        # Deprecated functionality
-        print.obj = NULL
+        verbose = getOption("ligerVerbose")
     ) {
-        .deprecateArgs(list(max.iters = "maxIter", use.unshared = "useUnshared",
-                            rand.seed = "seed"), defunct = "print.obj")
-        if (!all(sapply(object, is.matrix))) {
-            stop("All values in 'object' must be a matrix")
-        }
         # E ==> cell x gene scaled matrices
-        E <- lapply(object, t)
+        E <- object
         nDatasets <- length(E)
-        nCells <- sapply(E, nrow)
-        tmp <- gc() # nolint
-        nGenes <- ncol(E[[1]])
+        nCells <- sapply(E, ncol)
+        nGenes <- nrow(E[[1]])
         if (k >= nGenes) {
             stop("Select k lower than the number of variable genes: ", nGenes)
         }
-        Wm <- matrix(0, k, nGenes)
-        Vm <- rep(list(matrix(0, k, nGenes)), nDatasets)
+        Wm <- matrix(0, nGenes, k)
+        Vm <- rep(list(matrix(0, nGenes, k)), nDatasets)
         Hm <- lapply(nCells, function(n) matrix(0, n, k))
-        tmp <- gc()
+
         bestObj <- Inf
         bestSeed <- seed
-        runStats <- matrix(0, nrep, 2)
         for (i in seq(nrep)) {
             set.seed(seed = seed + i - 1)
             startTime <- Sys.time()
             if (!is.null(W.init))
-                W <- t(.checkInit(W.init, nCells, nGenes, k, "W"))
-            else W <- matrix(stats::runif(nGenes * k, 0, 2), k, nGenes)
+                W <- .checkInit(W.init, nCells, nGenes, k, "W")
+            else W <- matrix(stats::runif(nGenes * k, 0, 2), nGenes, k)
+
             if (!is.null(V.init)) {
                 V <- .checkInit(V.init, nCells, nGenes, k, "V")
-                V <- lapply(V, t)
             } else
                 V <- lapply(seq(nDatasets), function(i) {
-                    matrix(stats::runif(nGenes * k, 0, 2), k, nGenes)})
+                    matrix(stats::runif(nGenes * k, 0, 2), nGenes, k)})
+
             if (!is.null(H.init)) {
                 H <- .checkInit(H.init, nCells, nGenes, k, "H")
                 H <- lapply(H, t)
@@ -266,71 +240,27 @@ setMethod(
                 H <- lapply(nCells, function(n) {
                     matrix(stats::runif(n * k, 0, 2), n, k)
                 })
-            tmp <- gc()
-            delta <- 1
-            iters <- 0
-            sqrtLambda <- sqrt(lambda)
 
-            obj0 <- sum(sapply(
-                seq(nDatasets),
-                function(i) norm(E[[i]] - H[[i]] %*% (W + V[[i]]), "F") ^ 2
-            )) +
-                sum(sapply(
-                    seq(nDatasets),
-                    function(i) lambda*norm(H[[i]] %*% V[[i]], "F") ^ 2
-                ))
-            tmp <- gc()
             if (isTRUE(verbose)) {
                 .log("Start iNMF with seed: ", seed + i - 1, "...")
-                pb <- utils::txtProgressBar(0, maxIter, style = 3)
+                if (maxIter > 0)
+                    pb <- utils::txtProgressBar(0, maxIter, style = 3)
             }
-            while (delta > thresh & iters < maxIter) {
-                H <- lapply(
-                    seq(nDatasets),
-                    function(i)
-                        t(solveNNLS(
-                            C = rbind(t(W + V[[i]]), sqrtLambda*t(V[[i]])),
-                            B = rbind(t(E[[i]]), matrix(0, nGenes, nCells[i]))
-                        ))
-                )
-                tmp <- gc()
-                V <- lapply(
-                    seq(nDatasets),
-                    function(i)
-                        solveNNLS(C = rbind(H[[i]], sqrtLambda*H[[i]]),
-                            B = rbind(E[[i]] - H[[i]] %*% W,
-                                      matrix(0, nCells[[i]], nGenes)))
-                )
-                tmp <- gc()
-                W <- solveNNLS(C = rbindlist(H),
-                               B = rbindlist(lapply(seq(nDatasets),
-                                   function(i) E[[i]] - H[[i]] %*% V[[i]]
-                               )))
-                tmp <- gc()
-                obj <- sum(sapply(
-                    seq(nDatasets),
-                    function(i) norm(E[[i]] - H[[i]] %*% (W + V[[i]]), "F") ^ 2
-                )) +
-                    sum(sapply(
-                        seq(nDatasets),
-                        function(i)
-                            lambda*norm(H[[i]] %*% V[[i]], "F") ^ 2
-                    ))
-                tmp <- gc()
-                delta <- abs(obj0 - obj) / (mean(obj0, obj))
-                obj0 <- obj
-                iters <- iters + 1
-                if (isTRUE(verbose))
-                    utils::setTxtProgressBar(pb, value = iters)
+            iter <- 0
+            while (iter < maxIter) {
+                H <- inmfSolveH(W = W, V = V, E = E, lambda = lambda)
+                V <- inmfSolveV(W = W, H = H, E = E, lambda = lambda)
+                W <- inmfSolveW(H = H, V = V, E = E, lambda = lambda)
+                iter <- iter + 1
+                if (isTRUE(verbose) && maxIter > 0)
+                    utils::setTxtProgressBar(pb, value = iter)
+
             }
-            if (isTRUE(verbose)) {
+            if (isTRUE(verbose) && maxIter > 0) {
                 utils::setTxtProgressBar(pb, value = maxIter)
                 cat("\n")
             }
-            # if (iters == maxIter) {
-            #   print("Warning: failed to converge within the allowed number of iterations.
-            #         Re-running with a higher maxIter is recommended.")
-            # }
+            obj <- inmf_calcObj(E, H, W, V, lambda)
             if (obj < bestObj) {
                 Wm <- W
                 Hm <- H
@@ -339,33 +269,68 @@ setMethod(
                 bestSeed <- seed + i - 1
             }
             endTime <- difftime(time1 = Sys.time(), time2 = startTime,
-                                 units = "auto")
-            runStats[i, 1] <- as.double(endTime)
-            runStats[i, 2] <- iters
+                                units = "auto")
             if (isTRUE(verbose)) {
-                .log("Finished in ", runStats[i, 1], " ", units(endTime),
-                     ", ", iters, " iterations. \nMax iterations set: ",
-                     maxIter, "\nFinal objective delta: ", delta)
+                .log("Finished in ", endTime, " ", units(endTime),
+                     "\nObjective error: ", bestObj)
                 .log("Objective: ", obj)
                 .log("Best results with seed ", bestSeed)
             }
         }
-        out <- list(H = Hm, V = Vm, W = t(Wm))
+        out <- list(H = lapply(Hm, t), V = Vm, W = Wm)
         factorNames <- paste0("Factor_", seq(k))
         for (i in seq(nDatasets)) {
-            out$H[[i]] <- t(out$H[[i]])
-            colnames(out$H[[i]]) <- colnames(object[[i]])
-            rownames(out$H[[i]]) <- factorNames
-            out$V[[i]] <- t(out$V[[i]])
-            rownames(out$V[[i]]) <- rownames(object[[i]])
-            colnames(out$V[[i]]) <- factorNames
+            dimnames(out$H[[i]]) <- list(factorNames, colnames(object[[i]]))
+            dimnames(out$V[[i]]) <- list(rownames(object[[i]]), factorNames)
         }
         names(out$V) <- names(out$H) <- names(object)
-        rownames(out$W) <- rownames(object[[1]])
-        colnames(out$W) <- factorNames
+        dimnames(out$W) <- list(rownames(object[[1]]), factorNames)
         return(out)
     }
 )
 
-# Binds list of matrices row-wise (vertical stack)
-rbindlist <- function(mat_list) do.call(rbind, mat_list)
+inmf_calcObj <- function(E, H, W, V, lambda) {
+    # E - dgCMatrix
+    # H, W, V - matrix
+    obj <- 0
+    for (i in seq_along(E)) {
+        obj <- obj +
+            Matrix::norm(E[[i]] - (W + V[[i]]) %*% t(H[[i]]), "F") ^ 2 +
+            lambda*norm(V[[i]] %*% t(H[[i]]), "F") ^ 2
+    }
+    return(obj)
+}
+
+inmfSolveH <- function(W, V, E, lambda) {
+    H <- list()
+    for (i in seq_along(E)) {
+        CtC <- t(W + V[[i]]) %*% (W + V[[i]]) + lambda*(t(V[[i]]) %*% V[[i]])
+        CtB <- as.matrix(t(W + V[[i]]) %*% E[[i]])
+        H[[i]] <- t(RcppPlanc::bppnnls_prod(CtC, CtB))
+    }
+    return(H)
+}
+
+inmfSolveV <- function(W, H, E, lambda) {
+    V <- list()
+    for (i in seq_along(E)) {
+        CtC <- (1 + lambda)*(t(H[[i]]) %*% H[[i]])
+        CtB <- as.matrix(t(H[[i]]) %*% t(E[[i]]))
+        CtB <- CtB - t(H[[i]]) %*% H[[i]] %*% t(W)
+        V[[i]] <- t(RcppPlanc::bppnnls_prod(CtC, CtB))
+    }
+    return(V)
+}
+
+inmfSolveW <- function(H, V, E, lambda) {
+    m <- nrow(E[[1]])
+    k <- ncol(H[[1]])
+    CtC <- matrix(0, k, k)
+    CtB <- matrix(0, k, m)
+    for (i in seq_along(E)) {
+        CtC <- CtC + t(H[[i]]) %*% H[[i]]
+        CtB <- CtB + as.matrix(t(H[[i]]) %*% t(E[[i]])) -
+            t(H[[i]]) %*% H[[i]] %*% t(V[[i]])
+    }
+    return(t(RcppPlanc::bppnnls_prod(CtC, CtB)))
+}
diff --git a/R/runOnlineINMF.R b/R/runOnlineINMF.R
new file mode 100644
index 00000000..f31dbc14
--- /dev/null
+++ b/R/runOnlineINMF.R
@@ -0,0 +1,262 @@
+#' Perform online iNMF on scaled datasets
+#' @description Perform online integrative non-negative matrix factorization to
+#' represent multiple single-cell datasets in terms of \eqn{H}, \eqn{W}, and
+#' \eqn{V} matrices. It optimizes the iNMF objective function using online
+#' learning (non-negative least squares for H matrix, hierarchical alternating
+#' least squares for \eqn{W} and \eqn{V} matrices), where the number of factors
+#' is set by \code{k}. The function allows online learning in 3 scenarios:
+#'
+#' \enumerate{
+#'  \item Fully observed datasets;
+#'  \item Iterative refinement using continually arriving datasets;
+#'  \item Projection of new datasets without updating the existing factorization
+#' }
+#'
+#' All three scenarios require fixed memory independent of the number of cells.
+#'
+#' For each dataset, this factorization produces an \eqn{H} matrix (k by cell),
+#' a \eqn{V} matrix (genes by k) \eqn{C^\mathsf{T}C}, and a shared \eqn{W} matrix (genes by k). The
+#' \eqn{H} matrices represent the cell factor loadings. \eqn{W} is identical
+#' among all datasets, as it represents the shared components of the metagenes
+#' across datasets. The \eqn{V} matrices represent the dataset-specific
+#' components of the metagenes.
+#' @details
+#' For optional initialization, \code{W.init} must be a matrix object with
+#' number of rows equal to number of variable genes (denoted as \code{g}) and
+#' number of columns equal to \code{k}. Any of \code{V.init}, \code{A} and
+#' \code{B} must be a list object of n matrices where n is the number of
+#' datasets in \code{object}. For \code{V.init}, each matrix should be of size g
+#' x k. For \code{A.init}, each matrix should be k x k and for \code{B.init},
+#' each matrix should be g x k.
+#'
+#' Minibatch iterations is performed on small subset of cells. The exact
+#' minibatch size applied on each dataset is \code{miniBatch_size} multiplied by
+#' the proportion of cells in this dataset out of all cells. The setting of
+#' \code{miniBatch_size} is by default \code{5000}, which is reasonable.
+#' However, a smaller value such as \code{1000} may be necessary for analyzing
+#' very small datasets. In general, \code{miniBatch_size} should be no larger
+#' than the number of cells in the smallest dataset. An epoch is one completion
+#' of calculation on all cells after a number of iterations of minibatches.
+#' Therefore, the total number of iterations is determined by the setting of
+#' \code{max.epochs}, total number of cells, and \code{miniBatch_size}.
+#'
+#' @param object \linkS4class{liger} object. Scaled data required.
+#' @param newDatasets New datasets for scenario 2 or scenario 3.
+#' @param projection Perform data integration with scenario 3. See description.
+#' Default \code{FALSE}.
+#' @param WInit Optional initialization for W. See detail. Default \code{NULL}.
+#' @param VInit Optional initialization for V. See detail. Default \code{NULL}.
+#' @param AInit Optional initialization for A. See detail. Default \code{NULL}.
+#' @param BInit Optional initialization for B. See detail. Default \code{NULL}.
+#' @param k Inner dimension of factorization--number of metagenes. A value in
+#' the range 20-50 works well for most analyses. Default \code{20}.
+#' @param lambda Regularization parameter. Larger values penalize
+#' dataset-specific effects more strongly (i.e. alignment should increase as
+#' lambda increases). We recommend always using the default value except
+#' possibly for analyses with relatively small differences (biological
+#' replicates, male/female comparisons, etc.) in which case a lower value such
+#' as 1.0 may improve reconstruction quality. Default \code{5.0}.
+#' @param maxEpochs The number of epochs to iterate through. See detail.
+#' Default \code{5}.
+#' @param HALSiters Maximum number of block coordinate descent (HALS
+#' algorithm) iterations to perform for each update of \eqn{W} and \eqn{V}.
+#' Default \code{1}. Changing this parameter is not recommended.
+#' @param miniBatchSize Total number of cells in each minibatch. See detail.
+#' Default \code{5000}.
+#' @param seed Random seed to allow reproducible results. Default \code{123}.
+#' @param verbose Logical. Whether to show information of the progress. Default
+#' \code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.
+#' @return \code{object} with \code{W} slot updated with resulting \eqn{W}
+#' matrix; the \code{H}, \code{V}, \code{A} and \code{B} slots of each
+#' \linkS4class{ligerDataset} object in \code{datasets} slot is updated with the
+#' corresponding result matrices.
+#' @export
+#' @rdname runOnlineINMF
+#' @examples
+#' pbmc <- normalize(pbmc)
+#' pbmc <- selectGenes(pbmc)
+#' pbmc <- scaleNotCenter(pbmc)
+#' # Minibatch size has to be less than number of cell in the smallest dataset
+#' # Scenario 1
+#' pbmc <- online_iNMF(pbmc, miniBatch_size = 100)
+#' # Scenario 2
+#' # Fake new dataset by increasing all non-zero value in "ctrl" by 1
+#' ctrl2 <- rawData(dataset(pbmc, "ctrl"))
+#' ctrl2@x <- ctrl2@x + 1
+#' colnames(ctrl2) <- paste0(colnames(ctrl2), 2)
+#' pbmc2 <- online_iNMF(pbmc, k = 20, X_new = list(ctrl2 = ctrl2),
+#'                      miniBatch_size = 100)
+#' # Scenario 3
+#' pbmc3 <- online_iNMF(pbmc, k = 20, X_new = list(ctrl2 = ctrl2),
+#'                      miniBatch_size = 100, projection = TRUE)
+runOnlineINMF <- function(
+        object,
+        k = 20,
+        lambda = 5,
+        newDatasets = NULL,
+        projection = FALSE,
+        maxEpochs = 5,
+        HALSiter = 1,
+        miniBatchSize = 5000,
+        seed = 123,
+        verbose = getOption("ligerVerbose"),
+        ...
+) {
+    UseMethod("runOnlineINMF", object)
+}
+
+#' @export
+#' @rdname runOnlineINMF
+#' @method runOnlineINMF liger
+runOnlineINMF.liger <- function(
+        object,
+        k = 20,
+        lambda = 5,
+        newDatasets = NULL,
+        projection = FALSE,
+        maxEpochs = 5,
+        HALSiter = 1,
+        miniBatchSize = 5000,
+        seed = 123,
+        verbose = getOption("ligerVerbose"),
+        ...
+) {
+    .checkObjVersion(object)
+    object <- recordCommand(object, dependencies = c("hdf5r", "RcppPlanc"))
+    Es <- getMatrix(object, "scaleData", returnList = TRUE)
+    Es <- lapply(datasets(object), function(ld) {
+        sd <- scaleData(ld)
+        if (is.null(sd))
+            stop("Scaled data not available. Run `scaleNotCenter()` first")
+        if (inherits(sd, "H5D")) return(.H5DToH5Mat(sd))
+        else if (inherits(sd, "H5Group"))
+            return(.H5GroupToH5SpMat(sd, c(length(varFeatures(object)), ncol(ld))))
+        else return(scaleData(ld))
+    })
+    WInit <- VInit <- AInit <- BInit <- NULL
+    if (!is.null(newDatasets)) {
+        WInit <- getMatrix(object, "W")
+        VInit <- getMatrix(object, "V")
+        AInit <- getMatrix(object, "A")
+        BInit <- getMatrix(object, "B")
+        if (is.null(WInit) || any(sapply(VInit, is.null)) ||
+            any(sapply(AInit, is.null)) || any(sapply(BInit, is.null))) {
+            stop("Cannot find complete online iNMF result for current ",
+                 "datasets. Please run `runOnlineINMF()` without `newDataset` ",
+                 "first.")
+        }
+        # Put new datasets into input liger object
+        if (is.list(newDatasets)) {
+            allType <- sapply(newDatasets, function(x) class(x)[1])
+            if (!all(allType == "dgCMatrix")) {
+                stop("`newDatasets` must be a list of dgCMatrix for now.")
+            }
+            if (is.null(names(newDatasets))) {
+                stop("`newDatasets` must be a named list.")
+            }
+            allNewNames <- character()
+            for (i in seq_along(newDatasets)) {
+                if (names(newDatasets)[i] %in% names(object)) {
+                    newName <- paste0(names(newDatasets)[i], ".1")
+                } else newName <- names(newDatasets)[i]
+                dataset(object, newName) <- newDatasets[[i]]
+                allNewNames <- c(allNewNames, newName)
+            }
+            object <- normalize(object, useDatasets = allNewNames,
+                                verbose = verbose)
+            object <- scaleNotCenter(object, useDatasets = allNewNames,
+                                     verbose = verbose)
+            newDatasets <- getMatrix(object, slot = "scaleData",
+                                     dataset = allNewNames, returnList = TRUE)
+        }
+    }
+    object <- closeAllH5(object)
+    res <- runOnlineINMF.list(Es, newDatasets = newDatasets,
+                              projection = projection, k = k, lambda = lambda,
+                              maxEpochs = maxEpochs,
+                              miniBatchSize = miniBatchSize,
+                              HALSiter = HALSiter, verbose = verbose,
+                              WInit = WInit, VInit = VInit, AInit = AInit,
+                              BInit = BInit, seed = seed)
+
+    for (i in seq_along(object)) {
+        ld <- dataset(object, i)
+        ld@H <- res$H[[i]]
+        ld@V <- res$V[[i]]
+        ld@A <- res$A[[i]]
+        ld@B <- res$B[[i]]
+        datasets(object, check = FALSE)[[i]] <- ld
+    }
+    object@W <- res$W
+    object@uns$factorization <- list(k = k, lambda = lambda)
+    suppressMessages({object <- restoreH5Liger(object)})
+    return(object)
+}
+
+#' @export
+#' @rdname runOnlineINMF
+#' @method runOnlineINMF list
+runOnlineINMF.list <- function(
+        object,
+        k = 20,
+        lambda = 5,
+        newDatasets = NULL,
+        projection = FALSE,
+        maxEpochs = 5,
+        WInit = NULL,
+        VInit = NULL,
+        AInit = NULL,
+        BInit = NULL,
+        HALSiter = 1,
+        miniBatchSize = 5000,
+        seed = 123,
+        verbose = getOption("ligerVerbose"),
+        ...
+) {
+    if (!requireNamespace("RcppPlanc", quietly = TRUE))
+        stop("RcppPlanc installation required. Currently, please get the ",
+             "GitHub private repository access from the lab and run: \n",
+             "devtools::install_github(\"welch-lab/RcppPlanc\")")
+    nDatasets <- length(object) + length(newDatasets)
+    barcodeList <- c(lapply(object, colnames), lapply(newDatasets, colnames))
+    features <- rownames(object[[1]])
+    if (!is.null(seed)) set.seed(seed)
+    res <- RcppPlanc::onlineINMF(objectList = object, newDatasets = newDatasets,
+                                 project = projection, k = k, lambda = lambda,
+                                 maxEpoch = maxEpochs,
+                                 minibatchSize = miniBatchSize,
+                                 maxHALSIter = HALSiter, Vinit = VInit,
+                                 Winit = WInit, Ainit = AInit, Binit = BInit,
+                                 verbose = verbose)
+    factorNames <- paste0("Factor_", seq(k))
+    for (i in seq(nDatasets)) {
+        res$H[[i]] <- t(res$H[[i]])
+        dimnames(res$H[[i]]) <- list(factorNames, barcodeList[[i]])
+        dimnames(res$V[[i]]) <- list(features, factorNames)
+        dimnames(res$A[[i]]) <- list(factorNames, factorNames)
+        dimnames(res$B[[i]]) <- list(features, factorNames)
+    }
+    names(res$A) <- names(res$B) <- names(res$V) <-
+        names(res$H) <- c(names(object), names(newDatasets))
+    dimnames(res$W) <- list(features, factorNames)
+    return(res)
+}
+
+#' @export
+#' @rdname runOnlineINMF
+#' @method runOnlineINMF Seurat
+runOnlineINMF.Seurat <- function(
+        object,
+        k = 20,
+        lambda = 5,
+        newDatasets = NULL,
+        projection = FALSE,
+        maxEpochs = 5,
+        HALSiter = 1,
+        miniBatchSize = 5000,
+        seed = 123,
+        verbose = getOption("ligerVerbose"),
+        ...
+) {
+
+}
diff --git a/R/runUINMF.R b/R/runUINMF.R
index 2c111ccf..9037cb9c 100644
--- a/R/runUINMF.R
+++ b/R/runUINMF.R
@@ -31,6 +31,7 @@ runUINMF <- function(
 
 #' @export
 #' @rdname runUINMF
+#' @method runUINMF liger
 runUINMF.liger <- function(
         object,
         k = 20,
@@ -41,25 +42,40 @@ runUINMF.liger <- function(
         verbose = getOption("ligerVerbose"),
         ...
 ) {
+    .checkObjVersion(object)
     object <- recordCommand(object, dependencies = "RcppPlanc")
-    Elist <- getMatrix(object, "scaleData", returnList = TRUE)
+    object <- removeMissing(object, orient = "cell", verbose = verbose)
+    # Elist <- getMatrix(object, "scaleData", returnList = TRUE)
+
+    Elist <- lapply(datasets(object), function(ld) {
+        if (is.null(scaleData(ld)))
+            stop("Scaled data not available. ",
+                 "Run `scaleNotCenter(object)` first")
+        return(scaleData(ld))
+    })
     Ulist <- getMatrix(object, "scaleUnsharedData", returnList = TRUE)
+    if (all(sapply(Ulist, is.null))) {
+        stop("No scaled data for unshared feature found. Run `selectGenes()` ",
+             "with `unshared = TRUE` and then `scaleNotCenter()`.")
+    }
     res <- runUINMF.list(Elist, Ulist, k = k, lambda = lambda,
                          nIteration = nIteration, nRandomStarts = nRandomStarts,
                          seed = seed, verbose = verbose, ...)
     for (i in seq_along(object)) {
         ld <- dataset(object, i)
-        ld@H <- t(res$H[[i]])
+        ld@H <- res$H[[i]]
         ld@V <- res$V[[i]]
         if (!is.null(ld@scaleUnsharedData)) ld@U <- res$U[[i]]
         datasets(object, check = FALSE)[[i]] <- ld
     }
     object@W <- res$W
+    object@uns$factorization <- list(k = k, lambda = lambda)
     return(object)
 }
 
 #' @export
 #' @rdname runUINMF
+#' @method runUINMF list
 #' @param unsharedList List of matrices for unshared features
 runUINMF.list <- function(
         object,
@@ -72,27 +88,9 @@ runUINMF.list <- function(
         verbose = getOption("ligerVerbose"),
         ...
 ) {
-    nGene <- sapply(object, nrow)
-    if (!all(nGene == nGene[1])) {
-        stop("Number of rows must be the same in all matrices in `object`")
-    }
-    for (i in seq_along(object)) {
-        # Force sparse, and create 0xN matrix for dataset without unshared
-        object[[i]] <- as(object[[i]], "CsparseMatrix")
-        if (!is.null(unsharedList[[i]])) {
-            if (ncol(unsharedList[[i]]) != ncol(object[[i]])) {
-                stop("Number of columns in each matrix from `unsharedList` ",
-                     "must match with the corresponding matrix from `object`")
-            }
-            unsharedList[[i]] <- as(unsharedList[[i]], "CsparseMatrix")
-        } else {
-            unsharedList[[i]] <- as(as(as(Matrix::Matrix(
-                nrow = 0, ncol = ncol(object[[i]])
-            ), "dMatrix"), "generalMatrix"), "CsparseMatrix")
-        }
-    }
     bestObj <- Inf
     bestRes <- NULL
+    bestSeed <- NULL
     for (i in seq(nRandomStarts)) {
         seed <- seed + i - 1
         set.seed(seed)
@@ -101,28 +99,32 @@ runUINMF.list <- function(
         if (res$objErr < bestObj) {
             bestRes <- res
             bestObj <- res$objErr
+            bestSeed <- seed
         }
     }
+    if (isTRUE(verbose) && nRandomStarts > 1) {
+        .log("Best objective error: ", bestObj, "\nBest seed: ", bestSeed)
+    }
     rm(res)
-    genes <- rownames(object[[1]])
+    features <- rownames(object[[1]])
     unsharedFeatures <- lapply(unsharedList, rownames)
     factorNames <- paste0("Factor_", seq(k))
     barcodes <- lapply(object, colnames)
     for (i in seq_along(object)) {
-        rownames(bestRes$H[[i]]) <- barcodes[[i]]
-        colnames(bestRes$H[[i]]) <- factorNames
-        rownames(bestRes$V[[i]]) <- genes
-        colnames(bestRes$V[[i]]) <- factorNames
+        bestRes$H[[i]] <- t(bestRes$H[[i]])
+        dimnames(bestRes$H[[i]]) <- list(factorNames, barcodes[[i]])
+        dimnames(bestRes$V[[i]]) <- list(features, factorNames)
+        dimnames(bestRes$U[[i]]) <- list(unsharedFeatures[[i]], factorNames)
         rownames(bestRes$U[[i]]) <- unsharedFeatures[[i]]
         colnames(bestRes$U[[i]]) <- factorNames
     }
-    rownames(bestRes$W) <- genes
-    colnames(bestRes$W) <- factorNames
+    dimnames(bestRes$W) <- list(features, factorNames)
     return(bestRes)
 }
 
 #' @export
 #' @rdname runUINMF
+#' @method runUINMF Seurat
 #' @param datasetVar Variable for dataset belonging.
 runUINMF.Seurat <- function(
         object,
diff --git a/R/scaleNotCenter.R b/R/scaleNotCenter.R
index 99cc1a6d..a0e592ba 100644
--- a/R/scaleNotCenter.R
+++ b/R/scaleNotCenter.R
@@ -137,6 +137,7 @@ scaleNotCenter <- function(
                  dims = length(features), dtype = "int")
     h5file[[paste0(resultH5Path, "/featureIdx")]][1:length(featureIdx)] <-
         featureIdx
+    h5fileInfo(ld, "scaleData", check = FALSE) <- resultH5Path
     return(ld)
 }
 
@@ -169,7 +170,7 @@ scaleNotCenter <- function(
                                    cellIdx] <- chunk
         }
     )
-    h5fileInfo(ld, resultH5Path, check = FALSE) <- resultH5Path
+    h5fileInfo(ld, "scaleData", check = FALSE) <- resultH5Path
     safeH5Create(
         ld,
         dataPath = paste0(resultH5Path, ".featureIdx"),
@@ -188,9 +189,8 @@ scaleNotCenter <- function(
                       dimnames = list(NULL, colnames(norm.subset))))
     scaled <- t(scaleNotCenterFast(t(norm.subset)))
     # scaled: g x c
-    # scaled <- as.matrix(t(scaled))
-    scaled[is.na(scaled)] <- 0
-    scaled[scaled == Inf] = 0
+    scaled@x[is.na(scaled@x)] <- 0
+    scaled@x[scaled@x == Inf] <- 0
     rownames(scaled) <- rownames(norm.subset)
     colnames(scaled) <- colnames(norm.subset)
     return(scaled)
diff --git a/man/closeAllH5.Rd b/man/closeAllH5.Rd
new file mode 100644
index 00000000..5be7bf7d
--- /dev/null
+++ b/man/closeAllH5.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/h5Utility.R
+\name{closeAllH5}
+\alias{closeAllH5}
+\title{Close all links (to HDF5 files) of a liger object}
+\usage{
+closeAllH5(object)
+}
+\arguments{
+\item{object}{liger object.}
+}
+\value{
+\code{object} with links closed.
+}
+\description{
+When need to interact with the data embedded in HDF5 files out
+of the currect R session, the HDF5 files has to be closed in order to be
+available to other processes.
+}
diff --git a/man/liger-class.Rd b/man/liger-class.Rd
index 00c81d82..ca2d3e46 100644
--- a/man/liger-class.Rd
+++ b/man/liger-class.Rd
@@ -36,7 +36,7 @@
 \alias{cellMeta<-,liger,missing-method}
 \alias{cellMeta<-,liger,character-method}
 \alias{[[,liger,ANY,missing-method}
-\alias{[[<-,liger,ANY,missing,ANY-method}
+\alias{[[<-,liger,ANY,missing-method}
 \alias{$,liger-method}
 \alias{$<-,liger-method}
 \alias{varFeatures}
@@ -81,7 +81,7 @@ datasets(x, check = TRUE) <- value
 
 dataset(x, dataset = NULL)
 
-dataset(x, dataset, type = NULL, qc = TRUE) <- value
+dataset(x, dataset, type = NULL, qc = FALSE) <- value
 
 \S4method{dataset}{liger,character_OR_NULL}(x, dataset = NULL)
 
@@ -89,9 +89,9 @@ dataset(x, dataset, type = NULL, qc = TRUE) <- value
 
 \S4method{dataset}{liger,numeric}(x, dataset = NULL)
 
-\S4method{dataset}{liger,character,missing,ANY,ligerDataset}(x, dataset, type = NULL, qc = TRUE) <- value
+\S4method{dataset}{liger,character,missing,ANY,ligerDataset}(x, dataset, type = NULL, qc = FALSE) <- value
 
-\S4method{dataset}{liger,character,ANY,ANY,matrixLike}(x, dataset, type = c("rawData", "normData", "scaleData"), qc = TRUE) <- value
+\S4method{dataset}{liger,character,ANY,ANY,matrixLike}(x, dataset, type = c("rawData", "normData", "scaleData"), qc = FALSE) <- value
 
 \S4method{dataset}{liger,character,missing,ANY,`NULL`}(x, dataset, type = NULL, qc = TRUE) <- value
 
@@ -117,7 +117,7 @@ cellMeta(x, columns = NULL, check = FALSE) <- value
 
 \S4method{[[}{liger,ANY,missing}(x, i, j, ...)
 
-\S4method{[[}{liger,ANY,missing,ANY}(x, i, j, ...) <- value
+\S4method{[[}{liger,ANY,missing}(x, i, j, ...) <- value
 
 \S4method{$}{liger}(x, name)
 
diff --git a/man/ligerDataset-class.Rd b/man/ligerDataset-class.Rd
index deecb4d9..1d19d0b3 100644
--- a/man/ligerDataset-class.Rd
+++ b/man/ligerDataset-class.Rd
@@ -26,11 +26,13 @@
 \alias{scaleData,ligerDataset,missing-method}
 \alias{scaleData<-,ligerDataset,ANY,matrixLike_OR_NULL-method}
 \alias{scaleData<-,ligerDataset,ANY,H5D-method}
+\alias{scaleData<-,ligerDataset,ANY,H5Group-method}
 \alias{scaleUnsharedData}
 \alias{scaleUnsharedData<-}
 \alias{scaleUnsharedData,ligerDataset,missing-method}
 \alias{scaleUnsharedData<-,ligerDataset,ANY,matrixLike_OR_NULL-method}
 \alias{scaleUnsharedData<-,ligerDataset,ANY,H5D-method}
+\alias{scaleUnsharedData<-,ligerDataset,ANY,H5Group-method}
 \alias{getMatrix}
 \alias{getMatrix,ligerDataset,ANY,missing,missing-method}
 \alias{h5fileInfo}
@@ -90,6 +92,8 @@ scaleData(x, check = TRUE) <- value
 
 \S4method{scaleData}{ligerDataset,ANY,H5D}(x, check = TRUE) <- value
 
+\S4method{scaleData}{ligerDataset,ANY,H5Group}(x, check = TRUE) <- value
+
 scaleUnsharedData(x, dataset = NULL)
 
 scaleUnsharedData(x, check = TRUE) <- value
@@ -100,6 +104,8 @@ scaleUnsharedData(x, check = TRUE) <- value
 
 \S4method{scaleUnsharedData}{ligerDataset,ANY,H5D}(x, check = TRUE) <- value
 
+\S4method{scaleUnsharedData}{ligerDataset,ANY,H5Group}(x, check = TRUE) <- value
+
 getMatrix(x, slot = "rawData", dataset = NULL, returnList = FALSE)
 
 \S4method{getMatrix}{ligerDataset,ANY,missing,missing}(
diff --git a/man/runBPPINMF.Rd b/man/runBPPINMF.Rd
deleted file mode 100644
index e2a7c37e..00000000
--- a/man/runBPPINMF.Rd
+++ /dev/null
@@ -1,172 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/runBPPINMF.R
-\name{runBPPINMF}
-\alias{runBPPINMF}
-\alias{runBPPINMF.liger}
-\alias{runBPPINMF.list}
-\alias{runBPPINMF.Seurat}
-\title{Perform iNMF on scaled datasets}
-\usage{
-runBPPINMF(
-  object,
-  k,
-  lambda = 5,
-  nIteration = 30,
-  nRandomStarts = 1,
-  HInit = NULL,
-  WInit = NULL,
-  VInit = NULL,
-  seed = 1,
-  verbose = getOption("ligerVerbose"),
-  ...
-)
-
-\method{runBPPINMF}{liger}(
-  object,
-  k,
-  lambda = 5,
-  nIteration = 30,
-  nRandomStarts = 1,
-  HInit = NULL,
-  WInit = NULL,
-  VInit = NULL,
-  seed = 1,
-  verbose = getOption("ligerVerbose"),
-  readH5 = "auto",
-  ...
-)
-
-\method{runBPPINMF}{list}(
-  object,
-  k,
-  lambda = 5,
-  nIteration = 30,
-  nRandomStarts = 1,
-  HInit = NULL,
-  WInit = NULL,
-  VInit = NULL,
-  seed = 1,
-  verbose = getOption("ligerVerbose"),
-  barcodeList = NULL,
-  features = NULL,
-  ...
-)
-
-\method{runBPPINMF}{Seurat}(
-  object,
-  datasetVar,
-  k,
-  lambda = 5,
-  nIteration = 30,
-  nRandomStarts = 1,
-  HInit = NULL,
-  WInit = NULL,
-  VInit = NULL,
-  seed = 1,
-  verbose = getOption("ligerVerbose"),
-  ...
-)
-}
-\arguments{
-\item{object}{A \linkS4class{liger} object, a Seurat object or a named list
-of matrix, dgCMatrix, H5D objects, where the names represents dataset names
-and matrices are scaled on the same set of variable features, with rows as
-features and columns as cells.}
-
-\item{k}{Inner dimension of factorization (number of factors). Run
-\code{\link{suggestK}} to determine appropriate value; a general rule of
-thumb is that a higher \code{k} will be needed for datasets with more
-sub-structure.}
-
-\item{lambda}{Regularization parameter. Larger values penalize
-dataset-specific effects more strongly (i.e. alignment should increase as
-\code{lambda} increases). Default \code{5}.}
-
-\item{nIteration}{Total number of block coordinate descent iterations to
-perform. Default \code{30}.}
-
-\item{nRandomStarts}{Number of restarts to perform (iNMF objective function
-is non-convex, so taking the best objective from multiple successive
-initialization is recommended). For easier reproducibility, this increments
-the random seed by 1 for each consecutive restart, so future factorization
-of the same dataset can be run with one rep if necessary. Default \code{1}.}
-
-\item{HInit}{Initial values to use for \eqn{H} matrices. A list object where
-each element is the initial \eqn{H} matrix of each dataset. Default
-\code{NULL}.}
-
-\item{WInit}{Initial values to use for \eqn{W} matrix. A matrix object.
-Default \code{NULL}.}
-
-\item{VInit}{Initial values to use for \eqn{V} matrices. A list object where
-each element is the initial \eqn{V} matrix of each dataset. Default
-\code{NULL}.}
-
-\item{seed}{Random seed to allow reproducible results. Default \code{1}.}
-
-\item{verbose}{Logical. Whether to show information of the progress. Default
-\code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.}
-
-\item{...}{Arguments passed to methods.}
-
-\item{readH5}{\code{TRUE} to force reading H5 based data into memory and
-conduct factorization. \code{"auto"} reads H5 dataset with less than 8000
-cells. \code{FALSE} will stop users from running if H5 data presents.}
-
-\item{barcodeList}{List object of barcodes for each datasets, for setting
-dimnames of output \eqn{H} matrices. Default \code{NULL} uses \code{colnames}
-of matrices in the \code{object}.}
-
-\item{features}{Character vector of feature names, for setting dimnames of
-output \eqn{V} and \eqn{W} matrices. Default \code{NULL} uses \code{rownames}
-of matrices in the \code{object}.}
-
-\item{datasetVar}{Variable name in metadata indicating a factor of dataset
-belonging, or directly a factor that match with the number of cells.}
-}
-\value{
-The liger method returns the input \linkS4class{liger} object with
-factorization result updated. A list of all \eqn{H} matrices can be accessed
-with \code{getMatrix(object, "H")}, a list of all \eqn{V} matrices can be
-accessed with \code{getMatrix(object, "V")}, and the \eqn{W} matrix can be
-accessed with \code{getMatrix(object, "W")}.
-
-The list method returns a list of entries \code{H}, \code{V} and
-\code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
-is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
-\eqn{W} matrix.
-
-The Seurat method returns a list of entries \code{H}, \code{V} and
-\code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
-is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
-\eqn{W} matrix.
-}
-\description{
-Performs integrative non-negative matrix factorization (iNMF) (J.D. Welch,
-2019) to return factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. The
-objective function is stated as
-
-\deqn{\arg\min_{H\ge0,W\ge0,V\ge0}\sum_{i}^{d}||E_i-(W+V_i)Hi||^2_F+\lambda\sum_{i}^{d}||V_iH_i||_F^2}
-
-where \eqn{E_i} is the input non-negative matrix of the i'th dataset, \eqn{d}
-is the total number of datasets.
-
-The factorization produces a shared \eqn{W} matrix (genes by k), and for each
-dataset, an \eqn{H} matrix (k by cells) and a \eqn{V} matrix (genes by k).
-The \eqn{H} matrices represent the cell factor loadings. \eqn{W} is held
-consistent among all datasets, as it represents the shared components of the
-metagenes across datasets. The \eqn{V} matrices represent the
-dataset-specific components of the metagenes.
-
-This function adopts highly optimized fast and memory efficient
-implementation extended from Planc (Kannan, 2016). Pre-installation of
-extension package \code{RcppPlanc} is required. The underlying algorithm
-adopts the identical ANLS strategy as \code{\link{optimizeALS}} in the old
-version of LIGER.
-}
-\examples{
-pbmc <- normalize(pbmc)
-pbmc <- selectGenes(pbmc)
-pbmc <- scaleNotCenter(pbmc)
-pbmc <- runBPPINMF(pbmc, k = 20)
-}
diff --git a/man/runINMF.Rd b/man/runINMF.Rd
index 8646d4ec..acc84bb8 100644
--- a/man/runINMF.Rd
+++ b/man/runINMF.Rd
@@ -2,157 +2,174 @@
 % Please edit documentation in R/runINMF.R
 \name{runINMF}
 \alias{runINMF}
-\alias{runINMF,liger-method}
-\alias{runINMF,list-method}
+\alias{runINMF.liger}
+\alias{runINMF.list}
+\alias{runINMF.Seurat}
 \title{Perform iNMF on scaled datasets}
 \usage{
 runINMF(
   object,
-  k,
+  k = 20,
   lambda = 5,
-  thresh = 1e-06,
-  maxIter = 30,
-  nrep = 1,
-  H.init = NULL,
-  W.init = NULL,
-  V.init = NULL,
-  method = c("planc", "liger", "rcppml"),
-  useUnshared = FALSE,
+  nIteration = 30,
+  nRandomStarts = 1,
+  HInit = NULL,
+  WInit = NULL,
+  VInit = NULL,
   seed = 1,
-  readH5 = "auto",
   verbose = getOption("ligerVerbose"),
-  max.iters = maxIter,
-  use.unshared = useUnshared,
-  rand.seed = seed,
-  print.obj = NULL
+  ...
 )
 
-\S4method{runINMF}{liger}(
+\method{runINMF}{liger}(
   object,
-  k,
+  k = 20,
   lambda = 5,
-  thresh = 1e-06,
-  maxIter = 30,
-  nrep = 1,
-  H.init = NULL,
-  W.init = NULL,
-  V.init = NULL,
-  method = c("planc", "liger", "rcppml"),
-  useUnshared = FALSE,
+  nIteration = 30,
+  nRandomStarts = 1,
+  HInit = NULL,
+  WInit = NULL,
+  VInit = NULL,
   seed = 1,
+  verbose = getOption("ligerVerbose"),
   readH5 = "auto",
+  ...
+)
+
+\method{runINMF}{list}(
+  object,
+  k = 20,
+  lambda = 5,
+  nIteration = 30,
+  nRandomStarts = 1,
+  HInit = NULL,
+  WInit = NULL,
+  VInit = NULL,
+  seed = 1,
   verbose = getOption("ligerVerbose"),
-  max.iters = maxIter,
-  use.unshared = useUnshared,
-  rand.seed = seed,
-  print.obj = NULL
+  barcodeList = NULL,
+  features = NULL,
+  ...
 )
 
-\S4method{runINMF}{list}(
+\method{runINMF}{Seurat}(
   object,
+  datasetVar,
   k,
   lambda = 5,
-  thresh = 1e-06,
-  maxIter = 30,
-  nrep = 1,
-  H.init = NULL,
-  W.init = NULL,
-  V.init = NULL,
-  method = c("planc", "liger", "rcppml"),
-  useUnshared = FALSE,
+  nIteration = 30,
+  nRandomStarts = 1,
+  HInit = NULL,
+  WInit = NULL,
+  VInit = NULL,
   seed = 1,
-  readH5 = "auto",
   verbose = getOption("ligerVerbose"),
-  max.iters = maxIter,
-  use.unshared = useUnshared,
-  rand.seed = seed,
-  print.obj = NULL
+  ...
 )
 }
 \arguments{
-\item{object}{A \linkS4class{liger} object or a named list of matrix object,
-where the names represents dataset names and matrices are scaled on the same
-set of variable features, with rows as features and columns as cells.}
+\item{object}{A \linkS4class{liger} object, a Seurat object or a named list
+of matrix, dgCMatrix, H5D objects, where the names represents dataset names
+and matrices are scaled on the same set of variable features, with rows as
+features and columns as cells.}
 
 \item{k}{Inner dimension of factorization (number of factors). Run
 \code{\link{suggestK}} to determine appropriate value; a general rule of
 thumb is that a higher \code{k} will be needed for datasets with more
-sub-structure.}
+sub-structure. Default \code{20}.}
 
 \item{lambda}{Regularization parameter. Larger values penalize
 dataset-specific effects more strongly (i.e. alignment should increase as
 \code{lambda} increases). Default \code{5}.}
 
-\item{thresh}{Convergence threshold. Convergence occurs when
-\eqn{|obj_0-obj|/(mean(obj_0,obj)) < thresh}. Default \code{1e-6}.}
-
-\item{maxIter}{Maximum number of block coordinate descent iterations to
+\item{nIteration}{Total number of block coordinate descent iterations to
 perform. Default \code{30}.}
 
-\item{nrep}{Number of restarts to perform (iNMF objective function is
-non-convex, so taking the best objective from multiple successive
+\item{nRandomStarts}{Number of restarts to perform (iNMF objective function
+is non-convex, so taking the best objective from multiple successive
 initialization is recommended). For easier reproducibility, this increments
 the random seed by 1 for each consecutive restart, so future factorization
 of the same dataset can be run with one rep if necessary. Default \code{1}.}
 
-\item{H.init}{Initial values to use for \eqn{H} matrices. A list object where
+\item{HInit}{Initial values to use for \eqn{H} matrices. A list object where
 each element is the initial \eqn{H} matrix of each dataset. Default
 \code{NULL}.}
 
-\item{W.init}{Initial values to use for \eqn{W} matrix. A matrix object.
+\item{WInit}{Initial values to use for \eqn{W} matrix. A matrix object.
 Default \code{NULL}.}
 
-\item{V.init}{Initial values to use for \eqn{V} matrices. A list object where
+\item{VInit}{Initial values to use for \eqn{V} matrices. A list object where
 each element is the initial \eqn{V} matrix of each dataset. Default
 \code{NULL}.}
 
-\item{method}{NNLS subproblem solver. Choose from \code{"liger"} (default
-original implementation), \code{"planc"} or \code{"rcppml"}.}
+\item{seed}{Random seed to allow reproducible results. Default \code{1}.}
 
-\item{useUnshared}{Logical, whether to include unshared variable features and
-run optimizeUANLS algorithm. Defaul \code{FALSE}. Running
-\code{\link{selectGenes}} with \code{unshared = TRUE} and then running
-\code{\link{scaleNotCenter}} is required.}
+\item{verbose}{Logical. Whether to show information of the progress. Default
+\code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.}
 
-\item{seed}{Random seed to allow reproducible results. Default \code{1}.}
+\item{...}{Arguments passed to methods.}
 
 \item{readH5}{\code{TRUE} to force reading H5 based data into memory and
 conduct factorization. \code{"auto"} reads H5 dataset with less than 8000
 cells. \code{FALSE} will stop users from running if H5 data presents.}
 
-\item{verbose}{Logical. Whether to show information of the progress. Default
-\code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.}
+\item{barcodeList}{List object of barcodes for each datasets, for setting
+dimnames of output \eqn{H} matrices. Default \code{NULL} uses \code{colnames}
+of matrices in the \code{object}.}
 
-\item{max.iters, use.unshared, rand.seed}{\bold{Deprecated}. See Usage section
-for replacement.}
+\item{features}{Character vector of feature names, for setting dimnames of
+output \eqn{V} and \eqn{W} matrices. Default \code{NULL} uses \code{rownames}
+of matrices in the \code{object}.}
 
-\item{print.obj}{\bold{Defunct}. Whether to print objective function values
-after convergence when \code{verbose = TRUE}. Now always print when verbose.}
+\item{datasetVar}{Variable name in metadata indicating a factor of dataset
+belonging, or directly a factor that match with the number of cells.}
 }
 \value{
-\code{object} with \code{W} slot updated with the result \eqn{W}
-matrix, and the \code{H} and \code{V} slots of each
-\linkS4class{ligerDataset} object in the \code{datasets} slot updated with
-the dataset specific \eqn{H} and \eqn{V} matrix, respectively.
+The liger method returns the input \linkS4class{liger} object with
+factorization result updated. A list of all \eqn{H} matrices can be accessed
+with \code{getMatrix(object, "H")}, a list of all \eqn{V} matrices can be
+accessed with \code{getMatrix(object, "V")}, and the \eqn{W} matrix can be
+accessed with \code{getMatrix(object, "W")}.
+
+The list method returns a list of entries \code{H}, \code{V} and
+\code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
+is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
+\eqn{W} matrix.
+
+The Seurat method returns a list of entries \code{H}, \code{V} and
+\code{W}. \code{H} is a list of \eqn{H} matrices for each dataset. \code{V}
+is a list of \eqn{V} matrices for each dataset. \code{W} is the shared
+\eqn{W} matrix.
 }
 \description{
-Performs integrative non-negative matrix (iNMF) factorization to return
-factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. It optimizes the iNMF
-objective function using block coordinate descent (alternating non-negative
-least squares), where the number of factors is set by \code{k}. TODO: include
-objective function equation here in documentation (using deqn)
-
-For each dataset, this factorization produces an \eqn{H} matrix (cells by k),
-a \eqn{V} matrix (k by genes), and a shared \eqn{W} matrix (k by genes). The
-\eqn{H} matrices represent the cell factor loadings. \eqn{W} is held
+Performs integrative non-negative matrix factorization (iNMF) (J.D. Welch,
+2019) to return factorized \eqn{H}, \eqn{W}, and \eqn{V} matrices. The
+objective function is stated as
+
+\deqn{\arg\min_{H\ge0,W\ge0,V\ge0}\sum_{i}^{d}||E_i-(W+V_i)Hi||^2_F+\lambda\sum_{i}^{d}||V_iH_i||_F^2}
+
+where \eqn{E_i} is the input non-negative matrix of the i'th dataset, \eqn{d}
+is the total number of datasets. \eqn{E_i} is of size \eqn{m \times n_i} for
+\eqn{m} variable genes and \eqn{n_i} cells, \eqn{H_i} is of size
+\eqn{n_i \times k}, \eqn{V_i} is of size \eqn{m \times k}, and \eqn{W} is of
+size \eqn{m \times k}.
+
+The factorization produces a shared \eqn{W} matrix (genes by k), and for each
+dataset, an \eqn{H} matrix (k by cells) and a \eqn{V} matrix (genes by k).
+The \eqn{H} matrices represent the cell factor loadings. \eqn{W} is held
 consistent among all datasets, as it represents the shared components of the
 metagenes across datasets. The \eqn{V} matrices represent the
 dataset-specific components of the metagenes.
+
+This function adopts highly optimized fast and memory efficient
+implementation extended from Planc (Kannan, 2016). Pre-installation of
+extension package \code{RcppPlanc} is required. The underlying algorithm
+adopts the identical ANLS strategy as \code{\link{optimizeALS}} in the old
+version of LIGER.
 }
 \examples{
 pbmc <- normalize(pbmc)
 pbmc <- selectGenes(pbmc)
 pbmc <- scaleNotCenter(pbmc)
-# Only running a few iterations for fast examples
-pbmc <- runINMF(pbmc, k = 20, maxIter = 2)
+pbmc <- runINMF(pbmc, k = 20)
 }
diff --git a/man/optimizeALS.Rd b/man/runINMF_R.Rd
similarity index 86%
rename from man/optimizeALS.Rd
rename to man/runINMF_R.Rd
index 37681b04..624769d0 100644
--- a/man/optimizeALS.Rd
+++ b/man/runINMF_R.Rd
@@ -1,12 +1,12 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/optimizeALS.R
-\name{optimizeALS}
-\alias{optimizeALS}
-\alias{optimizeALS,liger-method}
-\alias{optimizeALS,list-method}
+% Please edit documentation in R/runINMF_R.R
+\name{runINMF_R}
+\alias{runINMF_R}
+\alias{runINMF_R,liger-method}
+\alias{runINMF_R,list-method}
 \title{Perform iNMF on scaled datasets}
 \usage{
-optimizeALS(
+runINMF_R(
   object,
   k,
   lambda = 5,
@@ -16,17 +16,14 @@ optimizeALS(
   H.init = NULL,
   W.init = NULL,
   V.init = NULL,
+  method = c("planc", "liger", "rcppml"),
   useUnshared = FALSE,
   seed = 1,
   readH5 = "auto",
-  verbose = getOption("ligerVerbose"),
-  max.iters = maxIter,
-  use.unshared = useUnshared,
-  rand.seed = seed,
-  print.obj = NULL
+  verbose = getOption("ligerVerbose")
 )
 
-\S4method{optimizeALS}{liger}(
+\S4method{runINMF_R}{liger}(
   object,
   k,
   lambda = 5,
@@ -39,31 +36,23 @@ optimizeALS(
   useUnshared = FALSE,
   seed = 1,
   readH5 = "auto",
-  verbose = getOption("ligerVerbose"),
-  max.iters = maxIter,
-  use.unshared = useUnshared,
-  rand.seed = seed,
-  print.obj = NULL
+  verbose = getOption("ligerVerbose")
 )
 
-\S4method{optimizeALS}{list}(
+\S4method{runINMF_R}{list}(
   object,
   k,
   lambda = 5,
-  thresh = 1e-06,
   maxIter = 30,
   nrep = 1,
   H.init = NULL,
   W.init = NULL,
   V.init = NULL,
+  method = c("planc", "liger", "rcppml"),
   useUnshared = FALSE,
   seed = 1,
   readH5 = "auto",
-  verbose = getOption("ligerVerbose"),
-  max.iters = maxIter,
-  use.unshared = useUnshared,
-  rand.seed = seed,
-  print.obj = NULL
+  verbose = getOption("ligerVerbose")
 )
 }
 \arguments{
@@ -103,6 +92,9 @@ Default \code{NULL}.}
 each element is the initial \eqn{V} matrix of each dataset. Default
 \code{NULL}.}
 
+\item{method}{NNLS subproblem solver. Choose from \code{"liger"} (default
+original implementation), \code{"planc"} or \code{"rcppml"}.}
+
 \item{useUnshared}{Logical, whether to include unshared variable features and
 run optimizeUANLS algorithm. Defaul \code{FALSE}. Running
 \code{\link{selectGenes}} with \code{unshared = TRUE} and then running
@@ -148,5 +140,5 @@ pbmc <- normalize(pbmc)
 pbmc <- selectGenes(pbmc)
 pbmc <- scaleNotCenter(pbmc)
 # Only running a few iterations for fast examples
-pbmc <- optimizeALS(pbmc, k = 20, maxIter = 2)
+pbmc <- runINMF(pbmc, k = 20, maxIter = 2)
 }
diff --git a/man/runOnlineINMF.Rd b/man/runOnlineINMF.Rd
new file mode 100644
index 00000000..bb6f9507
--- /dev/null
+++ b/man/runOnlineINMF.Rd
@@ -0,0 +1,177 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/runOnlineINMF.R
+\name{runOnlineINMF}
+\alias{runOnlineINMF}
+\alias{runOnlineINMF.liger}
+\alias{runOnlineINMF.list}
+\alias{runOnlineINMF.Seurat}
+\title{Perform online iNMF on scaled datasets}
+\usage{
+runOnlineINMF(
+  object,
+  k = 20,
+  lambda = 5,
+  newDatasets = NULL,
+  projection = FALSE,
+  maxEpochs = 5,
+  HALSiter = 1,
+  miniBatchSize = 5000,
+  seed = 123,
+  verbose = getOption("ligerVerbose"),
+  ...
+)
+
+\method{runOnlineINMF}{liger}(
+  object,
+  k = 20,
+  lambda = 5,
+  newDatasets = NULL,
+  projection = FALSE,
+  maxEpochs = 5,
+  HALSiter = 1,
+  miniBatchSize = 5000,
+  seed = 123,
+  verbose = getOption("ligerVerbose"),
+  ...
+)
+
+\method{runOnlineINMF}{list}(
+  object,
+  k = 20,
+  lambda = 5,
+  newDatasets = NULL,
+  projection = FALSE,
+  maxEpochs = 5,
+  WInit = NULL,
+  VInit = NULL,
+  AInit = NULL,
+  BInit = NULL,
+  HALSiter = 1,
+  miniBatchSize = 5000,
+  seed = 123,
+  verbose = getOption("ligerVerbose"),
+  ...
+)
+
+\method{runOnlineINMF}{Seurat}(
+  object,
+  k = 20,
+  lambda = 5,
+  newDatasets = NULL,
+  projection = FALSE,
+  maxEpochs = 5,
+  HALSiter = 1,
+  miniBatchSize = 5000,
+  seed = 123,
+  verbose = getOption("ligerVerbose"),
+  ...
+)
+}
+\arguments{
+\item{object}{\linkS4class{liger} object. Scaled data required.}
+
+\item{k}{Inner dimension of factorization--number of metagenes. A value in
+the range 20-50 works well for most analyses. Default \code{20}.}
+
+\item{lambda}{Regularization parameter. Larger values penalize
+dataset-specific effects more strongly (i.e. alignment should increase as
+lambda increases). We recommend always using the default value except
+possibly for analyses with relatively small differences (biological
+replicates, male/female comparisons, etc.) in which case a lower value such
+as 1.0 may improve reconstruction quality. Default \code{5.0}.}
+
+\item{newDatasets}{New datasets for scenario 2 or scenario 3.}
+
+\item{projection}{Perform data integration with scenario 3. See description.
+Default \code{FALSE}.}
+
+\item{maxEpochs}{The number of epochs to iterate through. See detail.
+Default \code{5}.}
+
+\item{miniBatchSize}{Total number of cells in each minibatch. See detail.
+Default \code{5000}.}
+
+\item{seed}{Random seed to allow reproducible results. Default \code{123}.}
+
+\item{verbose}{Logical. Whether to show information of the progress. Default
+\code{getOption("ligerVerbose")} which is \code{TRUE} if users have not set.}
+
+\item{WInit}{Optional initialization for W. See detail. Default \code{NULL}.}
+
+\item{VInit}{Optional initialization for V. See detail. Default \code{NULL}.}
+
+\item{AInit}{Optional initialization for A. See detail. Default \code{NULL}.}
+
+\item{BInit}{Optional initialization for B. See detail. Default \code{NULL}.}
+
+\item{HALSiters}{Maximum number of block coordinate descent (HALS
+algorithm) iterations to perform for each update of \eqn{W} and \eqn{V}.
+Default \code{1}. Changing this parameter is not recommended.}
+}
+\value{
+\code{object} with \code{W} slot updated with resulting \eqn{W}
+matrix; the \code{H}, \code{V}, \code{A} and \code{B} slots of each
+\linkS4class{ligerDataset} object in \code{datasets} slot is updated with the
+corresponding result matrices.
+}
+\description{
+Perform online integrative non-negative matrix factorization to
+represent multiple single-cell datasets in terms of \eqn{H}, \eqn{W}, and
+\eqn{V} matrices. It optimizes the iNMF objective function using online
+learning (non-negative least squares for H matrix, hierarchical alternating
+least squares for \eqn{W} and \eqn{V} matrices), where the number of factors
+is set by \code{k}. The function allows online learning in 3 scenarios:
+
+\enumerate{
+ \item Fully observed datasets;
+ \item Iterative refinement using continually arriving datasets;
+ \item Projection of new datasets without updating the existing factorization
+}
+
+All three scenarios require fixed memory independent of the number of cells.
+
+For each dataset, this factorization produces an \eqn{H} matrix (k by cell),
+a \eqn{V} matrix (genes by k) \eqn{C^\mathsf{T}C}, and a shared \eqn{W} matrix (genes by k). The
+\eqn{H} matrices represent the cell factor loadings. \eqn{W} is identical
+among all datasets, as it represents the shared components of the metagenes
+across datasets. The \eqn{V} matrices represent the dataset-specific
+components of the metagenes.
+}
+\details{
+For optional initialization, \code{W.init} must be a matrix object with
+number of rows equal to number of variable genes (denoted as \code{g}) and
+number of columns equal to \code{k}. Any of \code{V.init}, \code{A} and
+\code{B} must be a list object of n matrices where n is the number of
+datasets in \code{object}. For \code{V.init}, each matrix should be of size g
+x k. For \code{A.init}, each matrix should be k x k and for \code{B.init},
+each matrix should be g x k.
+
+Minibatch iterations is performed on small subset of cells. The exact
+minibatch size applied on each dataset is \code{miniBatch_size} multiplied by
+the proportion of cells in this dataset out of all cells. The setting of
+\code{miniBatch_size} is by default \code{5000}, which is reasonable.
+However, a smaller value such as \code{1000} may be necessary for analyzing
+very small datasets. In general, \code{miniBatch_size} should be no larger
+than the number of cells in the smallest dataset. An epoch is one completion
+of calculation on all cells after a number of iterations of minibatches.
+Therefore, the total number of iterations is determined by the setting of
+\code{max.epochs}, total number of cells, and \code{miniBatch_size}.
+}
+\examples{
+pbmc <- normalize(pbmc)
+pbmc <- selectGenes(pbmc)
+pbmc <- scaleNotCenter(pbmc)
+# Minibatch size has to be less than number of cell in the smallest dataset
+# Scenario 1
+pbmc <- online_iNMF(pbmc, miniBatch_size = 100)
+# Scenario 2
+# Fake new dataset by increasing all non-zero value in "ctrl" by 1
+ctrl2 <- rawData(dataset(pbmc, "ctrl"))
+ctrl2@x <- ctrl2@x + 1
+colnames(ctrl2) <- paste0(colnames(ctrl2), 2)
+pbmc2 <- online_iNMF(pbmc, k = 20, X_new = list(ctrl2 = ctrl2),
+                     miniBatch_size = 100)
+# Scenario 3
+pbmc3 <- online_iNMF(pbmc, k = 20, X_new = list(ctrl2 = ctrl2),
+                     miniBatch_size = 100, projection = TRUE)
+}
diff --git a/man/runUINMF.Rd b/man/runUINMF.Rd
index efdcae17..5f656cf5 100644
--- a/man/runUINMF.Rd
+++ b/man/runUINMF.Rd
@@ -66,8 +66,8 @@ Default \code{20}.}
 
 \item{nIteration}{Total number of iterations to perform. Default \code{30}.}
 
-\item{nRandomStarts}{Number of restarts to perform (iNMF objective function is
-non-convex, so taking the best objective from multiple successive
+\item{nRandomStarts}{Number of restarts to perform (iNMF objective function
+is non-convex, so taking the best objective from multiple successive
 initializations is recommended). For easier reproducibility, this increments
 the random seed by 1 for each consecutive restart, so future factorizations
 of the same dataset can be run with one rep if necessary. Default \code{1}.}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 7988da58..9c20dc2a 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -343,18 +343,6 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
-// solveNNLS
-arma::mat solveNNLS(const arma::mat& C, const arma::mat& B);
-RcppExport SEXP _rliger2_solveNNLS(SEXP CSEXP, SEXP BSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::mat& >::type C(CSEXP);
-    Rcpp::traits::input_parameter< const arma::mat& >::type B(BSEXP);
-    rcpp_result_gen = Rcpp::wrap(solveNNLS(C, B));
-    return rcpp_result_gen;
-END_RCPP
-}
 // ComputeSNN
 Eigen::SparseMatrix<double> ComputeSNN(Eigen::MatrixXd nn_ranked, double prune);
 RcppExport SEXP _rliger2_ComputeSNN(SEXP nn_rankedSEXP, SEXP pruneSEXP) {
@@ -420,7 +408,6 @@ static const R_CallMethodDef CallEntries[] = {
     {"_rliger2_cluster_vote", (DL_FUNC) &_rliger2_cluster_vote, 2},
     {"_rliger2_scale_columns_fast", (DL_FUNC) &_rliger2_scale_columns_fast, 3},
     {"_rliger2_max_factor", (DL_FUNC) &_rliger2_max_factor, 3},
-    {"_rliger2_solveNNLS", (DL_FUNC) &_rliger2_solveNNLS, 2},
     {"_rliger2_ComputeSNN", (DL_FUNC) &_rliger2_ComputeSNN, 2},
     {"_rliger2_WriteEdgeFile", (DL_FUNC) &_rliger2_WriteEdgeFile, 3},
     {"_rliger2_DirectSNNToFile", (DL_FUNC) &_rliger2_DirectSNNToFile, 4},
diff --git a/src/cpm-package-lock.cmake b/src/cpm-package-lock.cmake
new file mode 100644
index 00000000..67eeaba6
--- /dev/null
+++ b/src/cpm-package-lock.cmake
@@ -0,0 +1,3 @@
+# CPM Package Lock
+# This file should be committed to version control
+
diff --git a/src/entities.h b/src/entities.h
deleted file mode 100755
index eca0df02..00000000
--- a/src/entities.h
+++ /dev/null
@@ -1,472 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-
-#include <map>
-#if __cplusplus > 199711L
-	#include <unordered_map>
-#endif
-
-#include "parameters.h" 
-
-using std::endl;
-
-typedef struct DenseMatrix
-{
-	dtype **rowmajor,**colmajor;
-	int rows,cols,totalsize;
-	bool dependent;
-	int originalrows,originalcols;
-	DenseMatrix();
-	//DenseMatrix() : rows(0),cols(0),totalsize(0),dependent(false),rowmajor(NULL),colmajor(NULL) {}
-	
-	DenseMatrix(int rows_,int cols_,bool dependent_=false) : 
-		rows(rows_),cols(cols_),totalsize(rows*cols),dependent(dependent_),
-		originalrows(rows_),originalcols(cols_)
-	{
-		rowmajor = new dtype*[rows];
-		colmajor = new dtype*[cols];
-		if (not dependent)
-		{
-			for(int row=0;row<rows;++row) { rowmajor[row] = new dtype[cols](); }
-			for(int col=0;col<cols;++col) { colmajor[col] = new dtype[rows](); }
-		}
-	}
-	DenseMatrix(dtype**colmajor_,int rows_,int cols_,bool dependent_=false) : 
-		colmajor(colmajor_),rows(rows_),cols(cols_),totalsize(rows*cols),dependent(dependent_),
-		originalrows(rows_),originalcols(cols_)
-	{
-		rowmajor = new dtype*[rows];
-		if (not dependent)
-		{
-			for(int row=0;row<rows;++row) { rowmajor[row] = new dtype[cols](); }
-		}
-	}
-	void copyColumnToRow()
-	{
-		for(int col=0;col<cols;++col)
-		{
-			for(int row=0;row<rows;++row)
-			{
-				rowmajor[row][col] = colmajor[col][row];
-			}
-		}
-	}
-	void copyRowToColumn()
-	{
-		for(int row=0;row<rows;++row)
-		{
-			for(int col=0;col<cols;++col)
-			{
-				colmajor[col][row] = rowmajor[row][col];
-			}
-		}
-	}
-	void resetDim(int rows_,int cols_)
-	{
-		rows = rows_;
-		cols = cols_;
-		totalsize = rows*cols;
-	}
-	~DenseMatrix()
-	{
-		if(not dependent)
-		{
-			for(int row=0;row<originalrows;++row) { delete[] rowmajor[row]; }
-			for(int col=0;col<originalcols;++col) { delete[] colmajor[col]; }
-		}
-		delete[] rowmajor;
-		delete[] colmajor;
-	}
-} DenseMatrix;
-
-DenseMatrix operator +(const DenseMatrix& X, const DenseMatrix& Y) {
-	if (X.rows != Y.rows || X.cols != Y.cols)
-	{
-		//mismatched dimensions in matrix addition. Returning X
-		return X;
-	}
-	DenseMatrix Z = DenseMatrix(X.rows,X.cols,true);
-	for(int i=0; i < Z.rows; ++i)
-	{
-		for(int j=0; j < Z.cols; ++j)
-		{
-			Z.rowmajor[i][j] = X.rowmajor[i][j] + Y.rowmajor[i][j];
-		}
-	}
-	Z.copyRowToColumn();
-	return Z;
-}
-
-DenseMatrix operator -(const DenseMatrix& X, const DenseMatrix& Y) {
-	if (X.rows != Y.rows || X.cols != Y.cols)
-	{
-		//Error: mismatched dimensions in matrix substraction. Returning X
-		return X;
-	}
-	DenseMatrix Z = DenseMatrix(X.rows,X.cols,true);
-	for(int i=0; i < Z.rows; ++i)
-	{
-		for(int j=0; j < Z.cols; ++j)
-		{
-			Z.rowmajor[i][j] = X.rowmajor[i][j] - Y.rowmajor[i][j];
-		}
-	}
-	Z.copyRowToColumn();
-	return Z;
-}
-
-DenseMatrix operator *(const DenseMatrix& X, const DenseMatrix& Y) {
-	if (X.cols != Y.rows)
-	{
-		//Error: mismatched dimensions in matrix multiplication. Returning X.
-		return X;
-	}
-	DenseMatrix Z = DenseMatrix(X.rows,Y.cols,true);
-	for(int i = 0; i < X.rows; ++i)
-    	for(int j = 0; j < Y.cols; ++j)
-        	for(int k = 0; k < X.cols; ++k)
-            {
-            	Z.rowmajor[i][j] += X.rowmajor[i][k] * Y.rowmajor[k][j];
-            }
-	Z.copyRowToColumn();
-	return Z;
-}
-
-std::ostream& operator<<(std::ostream& os, const DenseMatrix& X)  
-{  
-    for (int i = 0; i < X.rows; ++i)
-	{
-		for (int j = 0; j < X.cols; ++j)
-		{
-			os << X.rowmajor[i][j] << " ";
-		}
-		os << endl;
-	}
-	return os;  
-}
-
-typedef struct LowerTriangularMatrix
-{
-	dtype* rowmajor;//serialized
-	int dim,totalsize;
-	
-	LowerTriangularMatrix()
-	{
-		dim = 1;
-		totalsize = ((dim*(dim+1))/2);
-		rowmajor = new dtype[totalsize]();
-	}
-	
-	LowerTriangularMatrix(int dim_) : dim(dim_),totalsize((dim_*(dim_+1))/2)
-	{
-		rowmajor = new dtype[totalsize]();
-	}
-	~LowerTriangularMatrix()
-	{
-		delete[] rowmajor;
-	}
-} LowerTriangularMatrix;
-
-std::ostream& operator<<(std::ostream& os, const LowerTriangularMatrix& X)  
-{  
-    int ind = 0;
-	for (int i = 0; i < X.dim; ++i)
-	{
-		for (int j = 0; j <=i; ++j)
-		{
-			os << X.rowmajor[ind] << " ";
-			++ind;
-		}
-		os << endl;
-	}
-	return os;  
-}
-
-typedef struct Mask
-{
-	bool* value;
-	int dim;
-	std::string tmp; 
-	std::vector<char> vs;
-	Mask() : value(NULL),dim(0) {}
-	Mask(int dim_,bool init) : dim(dim_)
-	{
-		value = new bool[dim]();
-		for(int i=0;i<dim;++i) value[i] = init;
-		tmp.reserve(dim);
-		vs.reserve(dim);
-	}
-	~Mask()
-	{
-		delete[] value;
-	}
-} Mask;
-
-typedef struct NNLS_Single_Input
-{//Cx = b
-	DenseMatrix *C;
-	dtype *x,*b;
-	int max_iter;
-	NNLS_Single_Input(DenseMatrix* C_,dtype* b_,int max_iter_) : C(C_),b(b_),max_iter(max_iter_)
-	{
-		x = new dtype[C->cols]();
-	}
-	~NNLS_Single_Input()
-	{
-		delete[] x;
-	}
-} NNLS_Single_Input;
-
-typedef struct NNLS_Single_State
-{//Cx = b, min norm Cx-b
-	DenseMatrix *C_xmask,*C_ymask;
-	Mask *xmask,*infeasiblemask;
-	dtype *y_masked,*x_masked,*y_masked_intermediate;
-	int full_exchange_buffer,infeasible,lowest_infeasible;
-	int iterations;
-	bool full_exchange_mode;
-	NNLS_Single_State(int rows,int cols)
-	{
-		xmask = new Mask(cols,false);
-		infeasiblemask = new Mask(cols,false);
-		C_xmask = new DenseMatrix(rows,cols,true);
-		C_xmask->cols = 0;
-		C_ymask = new DenseMatrix(rows,cols,true);
-		x_masked = new dtype[cols]();
-		y_masked = new dtype[cols]();
-		y_masked_intermediate = new dtype[rows]();
-		full_exchange_buffer = FULL_EXCHANGE_BUFFER;
-		full_exchange_mode = true;
-		lowest_infeasible = cols+1;
-		iterations = 0;
-	}
-	~NNLS_Single_State()
-	{
-		delete C_xmask;
-		delete C_ymask;
-		delete xmask;
-		delete infeasiblemask;
-		delete[] x_masked;
-		delete[] y_masked;
-		delete[] y_masked_intermediate;
-	}
-} NNLS_Single_State;
-
-
-#if __cplusplus > 199711L
-	// typedef std::unordered_map<std::string,LowerTriangularMatrix*> CholeskyMap;
-	typedef std::map<std::string,LowerTriangularMatrix*> CholeskyMap;
-	// typedef std::unordered_map<std::vector<int>,LowerTriangularMatrix*> CholeskyMap2;
-#else
-	typedef std::map<std::string,LowerTriangularMatrix*> CholeskyMap;
-	// typedef std::map<std::vector<int>,LowerTriangularMatrix*> CholeskyMap2;
-#endif
-
-inline void destroyCholeskyMap(CholeskyMap& cm)
-{
-	for(CholeskyMap::iterator it = cm.begin(); it != cm.end(); ++it)
-	{
-
-		delete it->second;
-	}
-	cm.clear();
-}
-// void destroyCholeskyMap2(CholeskyMap2& cm)
-// {
-// 	for(CholeskyMap2::iterator it = cm.begin(); it != cm.end(); ++it)
-// 	{
-// 		delete it->second;
-// 	}
-// 	cm.clear();
-// }
-
-
-
-typedef struct NNLS_Multiple_State
-{//CX = B, min norm CX-B
-	Mask **xmasks,**infeasiblemasks;
-	int *full_exchange_buffer;
-	int *lowest_infeasible;
-	bool *full_exchange_mode;
-	int *infeasible;
-	int iterations;
-	const int cols,cols_rhs;
-	LowerTriangularMatrix **G;
-	CholeskyMap choleskyMap;
-	// CholeskyMap2 choleskyMap2;
-
-	LowerTriangularMatrix **CFTCF;
-	DenseMatrix **CGTCF;
-	dtype **y_masked,**x_masked,**CGTb;
-
-	int totalfeasible;
-
-	NNLS_Multiple_State(int cols_, int cols_rhs_) : cols(cols_),cols_rhs(cols_rhs_)
-	{
-		xmasks = new Mask*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) xmasks[i] = new Mask(cols,false);
-		infeasiblemasks = new Mask*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) infeasiblemasks[i] = new Mask(cols,false);
-		
-		full_exchange_buffer = new int[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) full_exchange_buffer[i] = FULL_EXCHANGE_BUFFER;
-		lowest_infeasible = new int[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) lowest_infeasible[i] = cols + 1;
-		full_exchange_mode = new bool[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) full_exchange_mode[i] = true;
-		infeasible = new int[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) infeasible[i] = cols+1;
-
-		CFTCF = new LowerTriangularMatrix*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) CFTCF[i] = new LowerTriangularMatrix(cols);
-		CGTCF = new DenseMatrix*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) CGTCF[i] = new DenseMatrix(cols,cols);
-		y_masked = new dtype*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) y_masked[i] = new dtype[cols];
-		x_masked = new dtype*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) x_masked[i] = new dtype[cols];
-		CGTb = new dtype*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) CGTb[i] = new dtype[cols];
-
-		G = new LowerTriangularMatrix*[cols_rhs];
-
-		totalfeasible = 0;
-	}
-	~NNLS_Multiple_State()
-	{
-		for(int i=0;i<cols_rhs;++i)
-		{
-			delete CFTCF[i];
-			delete CGTCF[i];
-			delete xmasks[i];
-			delete infeasiblemasks[i];
-			delete[] x_masked[i];
-			delete[] y_masked[i];
-			delete[] CGTb[i];
-		}
-
-		delete[] xmasks;
-		delete[] infeasiblemasks;
-
-		delete[] full_exchange_buffer;
-		delete[] lowest_infeasible;
-		delete[] full_exchange_mode;
-		delete[] infeasible;
-		
-		delete[] CFTCF;
-		delete[] CGTCF;
-		delete[] x_masked;
-		delete[] y_masked;
-		delete[] CGTb;
-
-		delete[] G;
-
-		destroyCholeskyMap(choleskyMap);
-		// destroyCholeskyMap2(choleskyMap2);
-
-	}
-} NNLS_Multiple_State;
-
-
-
-
-typedef struct NNLS_Multiple_Input
-{//CX = B
-	LowerTriangularMatrix *CTC;
-	dtype **X,**CTB;
-	const int cols_rhs;
-	const int max_iter;
-	bool allocateX;
-	double allocate_time,init_time,
-		advance_time,switch_time,determine_time,
-		apply_time,normal_time,generateCGTCF_time,
-		matvec_time,generateCGTb_time,vector_time,mark_time,overwrite_time;
-	double choleskyCFTCF_time,choleskyFactorization_time,choleskyMapInsert_time,choleskyCheck_time,
-		choleskyAllocate_time,choleskyKeyFind_time,choleskyMaskToString_time;
-	NNLS_Multiple_State* state;
-	NNLS_Multiple_Input(LowerTriangularMatrix* CTC_,dtype**X_,dtype**CTB_,int cols_rhs_,int max_iter_) : 
-		CTC(CTC_),X(X_),CTB(CTB_),cols_rhs(cols_rhs_),max_iter(max_iter_),
-		allocate_time(0),init_time(0),
-		advance_time(0),switch_time(0),determine_time(0),
-		apply_time(0),normal_time(0),generateCGTCF_time(0),
-		matvec_time(0),generateCGTb_time(0),vector_time(0),mark_time(0),overwrite_time(0),
-		choleskyCFTCF_time(0),choleskyFactorization_time(0),choleskyMapInsert_time(0),choleskyCheck_time(0),
-		choleskyAllocate_time(0),choleskyKeyFind_time(0),choleskyMaskToString_time(0)
-	{
-		allocateX = false;
-		state = new NNLS_Multiple_State(CTC->dim,cols_rhs);
-	}
-	NNLS_Multiple_Input(LowerTriangularMatrix* CTC_,dtype**CTB_,int cols_rhs_,int max_iter_) : 
-		CTC(CTC_),CTB(CTB_),cols_rhs(cols_rhs_),max_iter(max_iter_),
-		allocate_time(0),init_time(0),
-		advance_time(0),switch_time(0),determine_time(0),
-		apply_time(0),normal_time(0),generateCGTCF_time(0),
-		matvec_time(0),generateCGTb_time(0),vector_time(0),mark_time(0),overwrite_time(0),
-		choleskyCFTCF_time(0),choleskyFactorization_time(0),choleskyMapInsert_time(0),choleskyCheck_time(0),
-		choleskyAllocate_time(0),choleskyKeyFind_time(0),choleskyMaskToString_time(0)
-	{
-		allocateX = true;
-		X = new dtype*[cols_rhs];
-		for(int i=0;i<cols_rhs;++i) X[i] = new dtype[CTC->dim];
-		state = new NNLS_Multiple_State(CTC->dim,cols_rhs);
-	}
-	~NNLS_Multiple_Input()
-	{
-		if(allocateX)
-		{
-			for(int i=0;i<cols_rhs;++i) delete[] X[i];
-			delete[] X;
-		}
-		delete state;
-	}
-} NNLS_Multiple_Input;
-
-
-
-
-
-
-
-typedef struct NMF_Input
-{//A: m * n, W : m * k, H: k * n
-	DenseMatrix *W,*H,*A;
-	int m,k,n;//data size, dimensions, data points
-	int max_iter_nmf,max_iter_nnls;
-	NMF_Input(DenseMatrix*W_,DenseMatrix*H_,DenseMatrix*A_,int max_iter_nmf_,int max_iter_nnls_) : 
-		W(W_),H(H_),A(A_),max_iter_nmf(max_iter_nmf_),max_iter_nnls(max_iter_nnls_)
-	{
-		m = A->rows;
-		k = W->cols;
-		n = H->cols;
-	}
-	~NMF_Input()
-	{
-	}
-} NMF_Input;
-
-typedef struct NMF_State
-{//A: m * n, W : m * k, H: k * n
-	LowerTriangularMatrix *HHT,*WTW;
-	dtype **WTA,**HAT;
-	int m,k,n;//data size, dimensions, data points
-	NMF_State(int m_,int k_,int n_) : m(m_),k(k_),n(n_) 
-	{
-		HHT = new LowerTriangularMatrix(k);
-		WTW = new LowerTriangularMatrix(k);
-		WTA = new dtype*[n];
-		for(int col=0;col<n;++col) WTA[col] = new dtype[k];
-		HAT = new dtype*[m];
-		for(int col=0;col<m;++col) HAT[col] = new dtype[k];
-	}
-	~NMF_State()
-	{
-		delete HHT;
-		delete WTW;
-		for(int i=0;i<n;++i) delete[] WTA[i];
-		delete[] WTA;
-		for(int i=0;i<m;++i) delete[] HAT[i];
-		delete[] HAT;
-	}
-} NMF_State;
diff --git a/src/factorization.h b/src/factorization.h
deleted file mode 100755
index b3aafe0f..00000000
--- a/src/factorization.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include "parameters.h"
-#include "vector.h"
-#include "entities.h"
-
-void cholesky_lowertriangular_cpu(LowerTriangularMatrix& G, LowerTriangularMatrix& C)
-{
-	int row = 0;
-	int startindex = 0;
-	while(startindex < C.totalsize)
-	{
-		int diagonalindex = startindex+row;
-		dtype sqsum = 0;
-		for(int col=0;col<row;++col)
-		{
-			int crossindex = (col*(col+1))/2;
-			dtype crosssum = productsum(G.rowmajor+crossindex,G.rowmajor+startindex,col);
-			int currentcolindex = startindex+col;
-			int crossdiagonalindex = crossindex+col;
-			G.rowmajor[currentcolindex] =
-				1.0/G.rowmajor[crossdiagonalindex]
-				*(C.rowmajor[currentcolindex] - crosssum);
-			sqsum += G.rowmajor[currentcolindex]*G.rowmajor[currentcolindex];
-		}
-		G.rowmajor[diagonalindex]=sqrt(C.rowmajor[diagonalindex]-sqsum);
-
-		++row;
-		startindex = (row*(row+1))/2;
-	}
-}
diff --git a/src/ls.h b/src/ls.h
deleted file mode 100755
index cad14a8e..00000000
--- a/src/ls.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-
-#include "matrix.h"
-#include "factorization.h"
-#include <map>
-#include <string>
-
-void normal_equations_cpu(DenseMatrix& C,dtype*x,dtype*b)
-{
-	//Algorithm from 5.3.2 (pg. 262) in Golub and Van Loan.
-	LowerTriangularMatrix G = LowerTriangularMatrix(C.cols);
-	LowerTriangularMatrix CTC = LowerTriangularMatrix(C.cols);
-
-	matmult_ata_lowertriangular_cpu(CTC,C);
-	matvecmult_transpose_cpu(C,b,x);
-	cholesky_lowertriangular_cpu(G,CTC);
-	
-	forwardsubstitution(G,x);
-	backsubstitution(G,x);
-
-}
-
-void normal_equations_precomputedCholesky_cpu(LowerTriangularMatrix& G,dtype*x)
-{//x needs to be initialized with appropriately masked CTb.
-	forwardsubstitution(G,x);
-	backsubstitution(G,x);
-}
-
-void normal_equations_precomputedCholesky_cpu_test(LowerTriangularMatrix& G,dtype*x)
-{//x needs to be initialized with appropriately masked CTb.
-	forwardsubstitution(G,x);
-	backsubstitution(G,x);
-}
-
-
diff --git a/src/mask.h b/src/mask.h
deleted file mode 100755
index dbdb6778..00000000
--- a/src/mask.h
+++ /dev/null
@@ -1,268 +0,0 @@
-#pragma once
-
-#include <sstream>
-#include <vector>
-
-#include "entities.h"
-
-void applyColumnMask(DenseMatrix& original,DenseMatrix& masked,Mask& mask,bool toggle=false)
-{
-	masked.cols = 0;
-	masked.totalsize = 0;
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] xor toggle )
-		{
-			// printf("masked index: %d\n",i);
-			masked.colmajor[masked.cols] = original.colmajor[i];
-			++masked.cols;
-		}
-	}
-	masked.totalsize = masked.cols*masked.rows;
-}
-void applyVectorMask(dtype*original,dtype*masked,Mask& mask,bool toggle=false)
-{
-	int counter = 0;
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] xor toggle )
-		{
-			masked[counter] = original[i];
-			++counter;
-		}
-	}
-}
-void overwriteOriginalWithMask(dtype*original,dtype*masked,Mask& mask)
-{
-	int counter = 0;
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] )
-		{
-			original[i] = masked[counter];
-			++counter;
-		}
-		else
-		{
-			original[i] = 0;
-		}
-	}
-}
-int markInfeasible(Mask& infeasiblemask,dtype*x_masked,dtype*y_masked,Mask& xmask)
-{
-	int xcounter = 0;
-	int ycounter = 0;
-	int infeasible = 0;
-	for(int i=0;i<xmask.dim;++i)
-	{
-		if( xmask.value[i] )
-		{
-			infeasiblemask.value[i] = x_masked[xcounter] < 0;
-			++xcounter;
-		}
-		else
-		{
-			infeasiblemask.value[i] = y_masked[ycounter] < 0;
-			++ycounter;
-		}
-
-		if ( infeasiblemask.value[i] ) { ++infeasible; }
-	}
-	return infeasible;
-}
-
-void switchSets(Mask& infeasiblemask,Mask& xmask,bool bpp)
-{
-	for(int i=xmask.dim-1;i>=0;--i)
-	{
-		if( infeasiblemask.value[i] )
-		{
-			xmask.value[i] = not xmask.value[i];
-			if( not bpp ) return;
-		}
-	}
-}
-
-void generateCGTCF(DenseMatrix& CGTCF,LowerTriangularMatrix& CTC,Mask& xmask)
-{
-	//int rowmap[xmask.dim];
-	//int colmap[xmask.dim];
-	std::vector<int> rowmap(xmask.dim);
-	std::vector<int> colmap(xmask.dim);
-	int newrows = 0;
-	for(int row = 0; row < xmask.dim; ++row)
-	{
-		if( not xmask.value[row] )
-		{
-			rowmap[newrows] = row;
-			++newrows;
-		}
-	}
-	CGTCF.rows = newrows;
-	int newcols = 0;
-	for(int col = 0; col < xmask.dim; ++col)
-	{
-		if( xmask.value[col] )
-		{
-			colmap[newcols] = col;
-			++newcols;
-		}
-	}
-	CGTCF.cols = newcols;
-
-	CGTCF.totalsize = CGTCF.rows*CGTCF.cols;
-
-	for(int row = 0; row < CGTCF.rows; ++row)
-	{
-		for(int col = 0; col < CGTCF.cols; ++col )
-		{
-			int oldrow = rowmap[row];
-			int oldcol = colmap[col];
-			int index = (oldcol > oldrow) ? ((oldcol*(oldcol+1))/2+oldrow) : ((oldrow*(oldrow+1))/2+oldcol);
-			CGTCF.colmajor[col][row] = CTC.rowmajor[index];
-		}
-	}
-}
-void generateCFTCF(LowerTriangularMatrix& CFTCF,LowerTriangularMatrix& CTC,Mask& columnmask)
-{
-	//int colmap[columnmask.dim];
-	std::vector<int> colmap(columnmask.dim);
-	int newdim = 0;
-	for(int i = 0; i < columnmask.dim; ++i)
-	{
-		if( columnmask.value[i] )
-		{
-			colmap[newdim] = i;
-			++newdim;
-		}
-	}
-	CFTCF.dim = newdim;
-	CFTCF.totalsize = (newdim*(newdim+1))/2;
-
-	for(int row = 0; row < CFTCF.dim; ++row)
-	{
-		int index = (row*(row+1))/2;
-		for(int col = 0; col <= row; ++col )
-		{
-			int oldrow = colmap[row];
-			int oldcol = colmap[col];
-			int oldindex = (oldcol > oldrow) ? ((oldcol*(oldcol+1))/2+oldrow) : ((oldrow*(oldrow+1))/2+oldcol);
-			CFTCF.rowmajor[index + col] = CTC.rowmajor[oldindex];
-
-		}
-	}
-}
-void generateCGTb(dtype* CTb,dtype* CGTb,Mask& xmask)
-{
-	int newdim = 0;
-	for(int i = 0; i < xmask.dim; ++i)
-	{
-		if( not xmask.value[i] )
-		{
-			CGTb[newdim] = CTb[i];
-			++newdim;
-		}
-	}
-}
-std::string maskToString(Mask& mask)
-{
-	std::stringstream stream;
-	// std::string key = "";
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] )
-		{
-			stream << i << "_";
-		}
-	}
-	return stream.str();
-}
-void maskToString3(Mask& mask)
-{
-	std::stringstream stream;
-	mask.tmp = "";
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] )
-		{
-			stream << i << "_";
-		}
-	}
-	mask.tmp = stream.str();
-}
-void num2string_reverse(int num,std::string& str)
-{
-	while(num > 0)
-	{
-		str += (char)((num%10) + 48);
-		num/=10;
-	}
-}
-void maskToString4(Mask& mask)
-{
-	mask.tmp = "";
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] )
-		{
-			num2string_reverse(i,mask.tmp);
-			mask.tmp+='_';
-		}
-	}
-}
-void num2vec_reverse(int num,std::vector<char>& vec)
-{
-	while(num > 0)
-	{
-		vec.push_back( (char)((num%10) + 48) );
-		num/=10;
-	}
-}
-void maskToString5(Mask& mask)
-{
-	mask.vs.clear();
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] )
-		{
-			num2vec_reverse(i,mask.vs);
-			mask.vs.push_back('_');
-		}
-	}
-	mask.vs.push_back('\0');
-}
-// void maskToString2(Mask& mask,std::string& key)
-// {
-// 	// std::stringstream stream;
-// 	key.reserve(5*mask.dim);
-// 	key = "";
-// 	for(int i=0;i<mask.dim;++i)
-// 	{
-// 		if( mask.value[i] )
-// 		{
-// 			// stream << i;
-// 			// key += stream.str() + "_";
-// 			// stream.str(std::string());
-// 			char num[11];
-// 			key += itoa(i,num,16);
-// 			key += "_";
-// 			// sprintf(num,"%d",i);
-// 			// key += num;
-// 			// key.append(itoa(i,num,36));
-// 			// key.append("_");
-// 		}
-// 	}
-// 	//key = key.substr(0, key.size()-1);
-// }
-
-void maskToVector(Mask& mask,std::vector<int>& key)
-{
-	for(int i=0;i<mask.dim;++i)
-	{
-		if( mask.value[i] )
-		{
-			key.push_back(i);
-		}
-	}
-}
-
diff --git a/src/matrix.h b/src/matrix.h
deleted file mode 100755
index cfe7285e..00000000
--- a/src/matrix.h
+++ /dev/null
@@ -1,152 +0,0 @@
-#pragma once
-
-#include <cstdlib>
-
-#include "vector.h"
-#include "entities.h"
-
-void forwardsubstitution(LowerTriangularMatrix& G,dtype*rhs)
-{//rhs is overwritten with the answer.
-	for (int row = 0; row < G.dim; ++row)
-	{
-		int startindex = (row*(row+1))/2;
-		dtype rowsum = 0;
-		for(int col = 0; col < row; ++col) rowsum += G.rowmajor[startindex + col]*rhs[col];
-		int diagonalindex = startindex + row;
-		rhs[row] = ( rhs[row] - rowsum ) / G.rowmajor[diagonalindex];
-	}
-}
-void backsubstitution(LowerTriangularMatrix& G,dtype*rhs)
-{//column-oriented back substitutes from the transpose of G.
-	//rhs is overwritten with the answer.
-	for(int col = G.dim-1; col >= 0; --col)
-	{
-		int startindex = (col*(col+1))/2;
-		int diagonalindex = startindex+col;
-		rhs[col] = rhs[col] / G.rowmajor[diagonalindex];
-		for(int row=0;row < col;++row) rhs[row] = rhs[row] - rhs[col]*G.rowmajor[startindex+row];
-	}
-}
-void matmult_ata_lowertriangular_cpu(
-	LowerTriangularMatrix& C, DenseMatrix& A)
-{//computes C=ATA from A.
-	for(int Crow=0;Crow<C.dim;++Crow)
-	{
-		int Cstart = (Crow*(Crow+1))/2;
-		for(int Ccol=0;Ccol<=Crow;++Ccol)
-		{
-			int Cindex = Cstart+Ccol;
-			C.rowmajor[Cindex] = vectordot(A.colmajor[Crow],A.colmajor[Ccol],A.rows);
-		}
-	}
-}
-void matmult_ata_lowertriangular_pointers_cpu(
-	LowerTriangularMatrix& C, dtype** A_colmajor,int A_rows)
-{//computes C=ATA from A.
-	for(int Crow=0;Crow<C.dim;++Crow)
-	{
-		int Cstart = (Crow*(Crow+1))/2;
-		for(int Ccol=0;Ccol<=Crow;++Ccol)
-		{
-			int Cindex = Cstart+Ccol;
-			C.rowmajor[Cindex] = vectordot(A_colmajor[Crow],A_colmajor[Ccol],A_rows);
-		}
-	}
-}
-void matvecmult_transpose_cpu(DenseMatrix& A,dtype*x,dtype*b,int factor=1)
-{//Multiplies A^T * x = b.
-	for(int brow=0;brow<A.cols;++brow)
-	{
-		b[brow] = factor*vectordot(A.colmajor[brow],x,A.rows);
-	}
-}
-
-void matvecmult_colmajor_cpu(DenseMatrix& A,dtype*x,dtype*b,int factor=1)
-{//Multiplies A * x = b, column-major on A.
-	for(int row=0;row<A.rows;++row) b[row] = 0;
-	for(int col=0;col<A.cols;++col)
-	{
-		vectoradd(A.colmajor[col],b,A.rows,factor*x[col]);
-		// std::cin.ignore();
-	}
-}
-void matmatmult_colmajor_cpu(DenseMatrix& A,DenseMatrix& B,DenseMatrix& C)
-{//Multiplies A * x = b, column-major on A.
-	if( A.rows != C.rows or B.cols != C.cols or A.cols != B.rows ) 
-	{
-		//error	
-	}
-	for(int bcol=0;bcol<B.cols;++bcol)
-	{
-		for(int crow=0;crow<C.rows;++crow) C.colmajor[bcol][crow] = 0;
-		matvecmult_colmajor_cpu(A,B.colmajor[bcol],C.colmajor[bcol]);
-	}
-}
-/* void randInit(dtype**colmajor,int rows,int cols,int max)
-{
-	for(int col=0;col<cols;++col)
-	{
-		for(int row=0;row<rows;++row)
-		{
-			colmajor[col][row] = rand()%max;
-		}
-	}
-} */
-
-dtype FrobeniusNorm(DenseMatrix* A)
-{
-	dtype sum = 0;
-	for(int row = 0;row<A->rows;++row)
-	{
-		for(int col = 0;col<A->cols;++col)
-		{
-			sum += A->colmajor[col][row]*A->colmajor[col][row];
-		}
-	}
-	return sqrt(sum);
-}
-
-dtype sparsity(DenseMatrix* A)
-{
-	int zeros = 0;
-	for(int row = 0;row<A->rows;++row)
-	{
-		for(int col = 0;col<A->cols;++col)
-		{
-			if(A->colmajor[col][row] == 0) ++zeros;
-		}
-	}
-	return (dtype)zeros/(dtype)(A->cols*A->rows);
-}
-void check_colmajor(dtype**standard,dtype**check,int cols,int rows,dtype tol=1e-5)
-{
-	int numwrong = 0;
-	for(int col=0;col<cols;++col)
-	{
-		for(int row = 0;row < rows;++row)
-		{
-			if(fabs(standard[col][row]-check[col][row])/standard[col][row] > tol )
-			{
-				++numwrong;
-			}
-		}
-	}
-	if(numwrong > 0)
-	{
-		//uh oh 
-	}
-	else
-	{
-		//ok
-	}
-}
-void copy_colmajor(dtype**from,dtype**to,int cols,int rows)
-{
-	for(int col=0;col<cols;++col)
-	{
-		for(int row = 0;row < rows;++row)
-		{
-			to[col][row] = from[col][row];
-		}
-	}
-}
diff --git a/src/nnls.h b/src/nnls.h
deleted file mode 100755
index 6be0569c..00000000
--- a/src/nnls.h
+++ /dev/null
@@ -1,330 +0,0 @@
-#pragma once
-
-#include <math.h>
-#include <ctime>
-#include <vector>
-
-#include "ls.h"
-#include "mask.h"
-
-void advanceInfeasibilityState_single(NNLS_Single_State& state)
-{
-	if( state.infeasible < state.lowest_infeasible ) 
-	{
-		state.lowest_infeasible = state.infeasible;
-		state.full_exchange_buffer = FULL_EXCHANGE_BUFFER;
-		state.full_exchange_mode = true;
-	}
-	else
-	{
-		--state.full_exchange_buffer;
-		if ( state.full_exchange_buffer <= 0 )
-		{
-			state.full_exchange_mode = false;
-		}
-	}
-}
-
-int nnls_single_cpu(NNLS_Single_Input& input)
-{//nonnegative least squares of Cx - b
-	int cols = input.C->cols;
-	int rows = input.C->rows;
-	NNLS_Single_State state = NNLS_Single_State(rows,cols);
-
-	matvecmult_transpose_cpu(*input.C,input.b,state.y_masked,-1);
-	state.infeasible = markInfeasible(*state.infeasiblemask,state.x_masked,state.y_masked,*state.xmask);
-	while( state.infeasible > 0 and state.iterations < input.max_iter )
-	{
-		advanceInfeasibilityState_single(state);
-
-		switchSets(*state.infeasiblemask,*state.xmask,state.full_exchange_mode);
-		// printf("infeasiblemask: %d,%d,%d,%d\n",state.infeasiblemask->value[0],state.infeasiblemask->value[1],
-		// 	state.infeasiblemask->value[2],state.infeasiblemask->value[3]);
-		applyColumnMask(*input.C,*state.C_xmask,*state.xmask);
-		applyColumnMask(*input.C,*state.C_ymask,*state.xmask,true);
-		
-		normal_equations_cpu(*state.C_xmask,state.x_masked,input.b);
-		// printf("x_masked, xmask: %f,%f,%f,%f; %d,%d,%d,%d\n",state.x_masked[0],state.x_masked[1],state.x_masked[2],state.x_masked[3],
-		// 	state.xmask->value[0],state.xmask->value[1],state.xmask->value[2],state.xmask->value[3]);
-
-		matvecmult_colmajor_cpu(*state.C_xmask,state.x_masked,state.y_masked_intermediate);
-		vectorsub(input.b,state.y_masked_intermediate,rows);
-		matvecmult_transpose_cpu(*state.C_ymask,state.y_masked_intermediate,state.y_masked);
-		// printf("y_masked: %f,%f,%f,%f\n",state.y_masked[0],state.y_masked[1],state.y_masked[2],state.y_masked[3]);
-		
-		state.infeasible = markInfeasible(*state.infeasiblemask,state.x_masked,state.y_masked,*state.xmask);
-		++state.iterations;
-	}
-
-	overwriteOriginalWithMask(input.x,state.x_masked,*state.xmask);
-
-	return state.iterations;
-}
-
-void initialize_multiple_cpu(NNLS_Multiple_Input& input,NNLS_Multiple_State& state)
-{
-	for(int i=0;i<state.cols_rhs;++i)
-	{
-		vectorinit(state.x_masked[i],state.cols,0);
-		vectornegate(input.CTB[i],state.y_masked[i],state.cols);
-		state.full_exchange_buffer[i] = FULL_EXCHANGE_BUFFER;
-		state.lowest_infeasible[i] = state.cols + 1;
-		state.full_exchange_mode[i] = true;
-		state.infeasible[i] = state.cols+1;
-		for(int j=0;j<state.xmasks[i]->dim;++j) state.xmasks[i]->value[j] = false;
-		for(int j=0;j<state.infeasiblemasks[i]->dim;++j) state.infeasiblemasks[i]->value[j] = false;
-		state.CFTCF[i]->dim = state.CGTCF[i]->originalcols;
-		state.CFTCF[i]->totalsize = (state.CGTCF[i]->originalcols*(state.CGTCF[i]->originalcols+1))/2;
-		state.CGTCF[i]->rows = state.CGTCF[i]->originalrows;
-		state.CGTCF[i]->cols = state.CGTCF[i]->originalcols;
-		state.CGTCF[i]->totalsize = state.CGTCF[i]->originalrows*state.CGTCF[i]->originalcols;
-		//printf("%f,%f,%f,%f\n",state.y_masked[i][0],state.y_masked[i][1],state.y_masked[i][2],state.y_masked[i][3]);
-	}
-	state.totalfeasible = 0;
-	state.iterations = 0;
-	destroyCholeskyMap(state.choleskyMap);
-}
-void markInfeasible_multiple_cpu(NNLS_Multiple_State& state)
-{
-	for(int i=0;i<state.cols_rhs;++i)
-	{
-		if(state.infeasible[i] > 0)
-		{
-			state.infeasible[i] = markInfeasible(*state.infeasiblemasks[i],state.x_masked[i],state.y_masked[i],*state.xmasks[i]);
-			if(state.infeasible[i] == 0) ++state.totalfeasible;
-		}
-	}
-}
-
-void determineCholeskyFactors_cpu(NNLS_Multiple_Input& input,NNLS_Multiple_State& state)
-{
-	for(int i=0;i<state.cols_rhs;++i)
-	{
-		if(state.infeasible[i] > 0)
-		{
-			std::string key = maskToString(*state.xmasks[i]);
-			CholeskyMap::iterator it = state.choleskyMap.find(key);
-			if( it != state.choleskyMap.end() )
-			{
-				state.G[i] = it->second;
-			}
-			else
-			{
-				generateCFTCF(*state.CFTCF[i],*input.CTC,*state.xmasks[i]);
-				LowerTriangularMatrix* G = new LowerTriangularMatrix(state.CFTCF[i]->dim);
-
-				cholesky_lowertriangular_cpu(*G,*state.CFTCF[i]);
-				state.G[i] = G;
-				std::pair<CholeskyMap::iterator,bool> ret = 
-					state.choleskyMap.insert ( std::pair<std::string,LowerTriangularMatrix*>(key,G) );
-				if ( not ret.second ) 
-				{
-					//ERROR! Duplicate Cholesky factor was inserted
-				}
-				
-			}
-		}
-	}
-}
-void determineCholeskyFactors_cpu_profile(NNLS_Multiple_Input& input,NNLS_Multiple_State& state)
-{
-	std::clock_t start;
-	std::clock_t checkStart = std::clock();
-	// #pragma omp parallel for
-	for(int i=0;i<state.cols_rhs;++i)
-	{
-		if(state.infeasible[i] > 0)
-		{
-			start = std::clock();
-			// maskToString3(*state.xmasks[i]);
-			// std::string& key = state.xmasks[i]->tmp;
-			// maskToString4(*state.xmasks[i]);std::string& key = state.xmasks[i]->tmp;
-			maskToString5(*state.xmasks[i]);std::string key = &(*(state.xmasks[i]->vs.begin()));
-			// std::string key; maskToString2(*state.xmasks[i],key);
-			// std::vector<int> key; maskToVector(*state.xmasks[i],key);
-			input.choleskyMaskToString_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-			
-			start = std::clock();
-			CholeskyMap::iterator it = state.choleskyMap.find(key);
-			// CholeskyMap2::iterator it = state.choleskyMap2.find(key);
-			input.choleskyKeyFind_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-			// if( it != state.choleskyMap2.end() ) state.G[i] = it->second;
-			if( it != state.choleskyMap.end() ) state.G[i] = it->second;
-			else
-			{
-				start = std::clock();
-				generateCFTCF(*state.CFTCF[i],*input.CTC,*state.xmasks[i]);
-				input.choleskyCFTCF_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-				start = std::clock();
-				LowerTriangularMatrix* G = new LowerTriangularMatrix(state.CFTCF[i]->dim);
-				input.choleskyAllocate_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-				
-				start = std::clock();
-				cholesky_lowertriangular_cpu(*G,*state.CFTCF[i]);
-				input.choleskyFactorization_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-				state.G[i] = G;
-				start = std::clock();
-				std::pair<CholeskyMap::iterator,bool> ret = 
-					state.choleskyMap.insert ( std::pair<std::string,LowerTriangularMatrix*>(key,G) );
-				// std::pair<CholeskyMap2::iterator,bool> ret = 
-				// 	state.choleskyMap2.insert ( std::pair<std::vector<int>,LowerTriangularMatrix*>(key,G) );
-				if ( not ret.second ) 
-				{
-					//ERROR! Duplicate Cholesky factor was inserted
-				}
-				input.choleskyMapInsert_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-			}
-		}
-	}
-	input.choleskyCheck_time += ( std::clock() - checkStart ) / (double) CLOCKS_PER_SEC;
-}
-
-void advanceInfeasibilityState_multiple(NNLS_Multiple_State& state)
-{
-	for(int i=0;i<state.cols_rhs;++i)
-	{
-		if( state.infeasible[i] < state.lowest_infeasible[i] ) 
-		{
-			state.lowest_infeasible[i] = state.infeasible[i];
-			state.full_exchange_buffer[i] = FULL_EXCHANGE_BUFFER;
-			state.full_exchange_mode[i] = true;
-		}
-		else
-		{
-			--state.full_exchange_buffer[i];
-			if ( state.full_exchange_buffer[i] <= 0 )
-			{
-				state.full_exchange_mode[i] = false;
-			}
-		}
-	}
-}
-
-int nnls_multiple_cpu(NNLS_Multiple_Input& input)
-{//nonnegative least squares of CX - B
-	// int cols = input.CTC->dim;
-	// int cols_rhs = input.cols_rhs;
-	// NNLS_Multiple_State state = NNLS_Multiple_State(cols,cols_rhs);
-	NNLS_Multiple_State& state = *input.state;
-
-	initialize_multiple_cpu(input,state);
-
-	markInfeasible_multiple_cpu(state);
-	determineCholeskyFactors_cpu(input,state);
-
-	while( state.totalfeasible < state.cols_rhs and state.iterations < input.max_iter )
-	{
-		advanceInfeasibilityState_multiple(state);
-
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) switchSets(*state.infeasiblemasks[i],*state.xmasks[i],state.full_exchange_mode[i]);
-		// printf("infeasiblemask: %d,%d,%d,%d\n",state.infeasiblemasks[0]->value[0],state.infeasiblemasks[0]->value[1],
-		// 	state.infeasiblemasks[0]->value[2],state.infeasiblemasks[0]->value[3]);
-		determineCholeskyFactors_cpu(input,state);
-		
-		// printv(state.G[0]->rowmajor,state.G[0]->totalsize);
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) applyVectorMask(input.CTB[i],state.x_masked[i],*state.xmasks[i]);
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) normal_equations_precomputedCholesky_cpu(*state.G[i],state.x_masked[i]);
-		// printf("x_masked, xmask: %f,%f,%f,%f; %d,%d,%d,%d\n",state.x_masked[0][0],state.x_masked[0][1],state.x_masked[0][2],state.x_masked[0][3],
-		// 	state.xmasks[0]->value[0],state.xmasks[0]->value[1],state.xmasks[0]->value[2],state.xmasks[0]->value[3]);
-
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) generateCGTCF(*state.CGTCF[i],*input.CTC,*state.xmasks[i]);
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) matvecmult_colmajor_cpu(*state.CGTCF[i],state.x_masked[i],state.y_masked[i]);
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) generateCGTb(input.CTB[i],state.CGTb[i],*state.xmasks[i]);
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) vectorsub(state.CGTb[i],state.y_masked[i],state.CGTCF[i]->rows);
-		// printf("y_masked: %f,%f,%f,%f\n",state.y_masked[0][0],state.y_masked[0][1],state.y_masked[0][2],state.y_masked[0][3]);
-		
-		markInfeasible_multiple_cpu(state);
-		++state.iterations;
-	}
-
-	for(int i=0;i<state.cols_rhs;++i) overwriteOriginalWithMask(input.X[i],state.x_masked[i],*state.xmasks[i]);
-
-	int totaliterations = state.iterations;
-
-	return totaliterations;
-}
-
-int nnls_multiple_cpu_profile(NNLS_Multiple_Input& input)
-{//nonnegative least squares of CX - B
-	std::clock_t start;
-
-	// start = std::clock();
-	// int cols = input.CTC->dim;
-	// int cols_rhs = input.cols_rhs;
-	// NNLS_Multiple_State state = NNLS_Multiple_State(cols,cols_rhs);
-	// input.allocate_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-	NNLS_Multiple_State& state = *input.state;
-
-	start = std::clock();
-	initialize_multiple_cpu(input,state);
-	input.init_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-	start = std::clock();
-	markInfeasible_multiple_cpu(state);
-	input.mark_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-	
-	start = std::clock();
-	determineCholeskyFactors_cpu_profile(input,state);
-	input.determine_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-	while( state.totalfeasible < state.cols_rhs and state.iterations < input.max_iter )
-	{
-		start = std::clock();
-		advanceInfeasibilityState_multiple(state);
-		input.advance_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-		start = std::clock();
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) switchSets(*state.infeasiblemasks[i],*state.xmasks[i],state.full_exchange_mode[i]);
-		input.switch_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-		// printf("infeasiblemask: %d,%d,%d,%d\n",state.infeasiblemasks[0]->value[0],state.infeasiblemasks[0]->value[1],
-		// 	state.infeasiblemasks[0]->value[2],state.infeasiblemasks[0]->value[3]);
-		start = std::clock();
-		determineCholeskyFactors_cpu_profile(input,state);
-		input.determine_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-		
-		// printv(state.G[0]->rowmajor,state.G[0]->totalsize);
-		start = std::clock();
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) applyVectorMask(input.CTB[i],state.x_masked[i],*state.xmasks[i]);
-		input.apply_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-		start = std::clock();
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) normal_equations_precomputedCholesky_cpu(*state.G[i],state.x_masked[i]);
-		input.normal_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-		// printf("x_masked, xmask: %f,%f,%f,%f; %d,%d,%d,%d\n",state.x_masked[0][0],state.x_masked[0][1],state.x_masked[0][2],state.x_masked[0][3],
-		// 	state.xmasks[0]->value[0],state.xmasks[0]->value[1],state.xmasks[0]->value[2],state.xmasks[0]->value[3]);
-
-		start = std::clock();
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) generateCGTCF(*state.CGTCF[i],*input.CTC,*state.xmasks[i]);
-		input.generateCGTCF_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-		
-		start = std::clock();
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) matvecmult_colmajor_cpu(*state.CGTCF[i],state.x_masked[i],state.y_masked[i]);
-		input.matvec_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-		
-		start = std::clock();
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) generateCGTb(input.CTB[i],state.CGTb[i],*state.xmasks[i]);
-		input.generateCGTb_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-		start = std::clock();
-		for(int i=0;i<state.cols_rhs;++i) if(state.infeasible[i] > 0) vectorsub(state.CGTb[i],state.y_masked[i],state.CGTCF[i]->rows);
-		input.vector_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-		// printf("y_masked: %f,%f,%f,%f\n",state.y_masked[0][0],state.y_masked[0][1],state.y_masked[0][2],state.y_masked[0][3]);
-
-		start = std::clock();		
-		markInfeasible_multiple_cpu(state);
-		input.mark_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-		++state.iterations;
-	}
-
-	start = std::clock();
-	for(int i=0;i<state.cols_rhs;++i) overwriteOriginalWithMask(input.X[i],state.x_masked[i],*state.xmasks[i]);
-	input.overwrite_time += ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
-
-	int totaliterations = state.iterations;
-
-	return totaliterations;
-}
diff --git a/src/parameters.h b/src/parameters.h
deleted file mode 100755
index 4fcb6210..00000000
--- a/src/parameters.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-typedef double dtype;
-#define FULL_EXCHANGE_BUFFER 3
diff --git a/src/run_nmf.cpp b/src/run_nmf.cpp
deleted file mode 100755
index a0b972f5..00000000
--- a/src/run_nmf.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <math.h>
-#include <stdio.h>
-#include <iostream>
-#include <string>
-
-#include "parameters.h"
-#include "entities.h"
-#include "vector.h"
-#include "matrix.h"
-#include "factorization.h"
-#include "ls.h"
-#include "mask.h"
-#include "nnls.h"
-
-// NNLS implementation based on source code written by Robert Lee
-// Original code available at https://github.com/rlee32/nmf
-#include <RcppArmadillo.h>
-// [[Rcpp::depends("RcppArmadillo")]]
-
-using namespace Rcpp;
-using namespace arma;
-
-DenseMatrix r_to_cpp(const NumericMatrix& A)
-{
-	int n = A.nrow();
-	int m = A.ncol();
-
-	DenseMatrix X = DenseMatrix(n,m);
-	for(int i = 0; i < n; ++i)
-	{
-		for (int j = 0; j < m; ++j)
-		{
-			X.rowmajor[i][j] = A(i,j);
-		}
-	}
-	X.copyRowToColumn();
-	return X;
-}
-
-LowerTriangularMatrix r_to_cpp_lower_triangular(const NumericMatrix& A)
-{
-	int n = A.nrow();
-	int m = A.ncol();
-	if (n != m)
-	{
-		//uh oh
-	}
-	LowerTriangularMatrix X = LowerTriangularMatrix(n);
-	int ind = 0;
-	for(int i = 0; i < n; ++i)
-	{
-		for (int j = 0; j <= i; ++j)
-		{
-			X.rowmajor[ind] = A(i,j);
-			++ind;
-		}
-	}
-	return X;
-}
-
-void arma_to_cpp(const mat& A, DenseMatrix& X)
-{
-	int n = A.n_rows;
-	int m = A.n_cols;
-
-	for (int i = 0; i < n; ++i)
-	{
-		for (int j = 0; j < m; ++j)
-		{
-			X.rowmajor[i][j] = A(i,j);
-		}
-	}
-	X.copyRowToColumn();
-}
-
-void arma_to_cpp_lower_triangular(const mat& A, LowerTriangularMatrix& X)
-{
-	int n = A.n_rows;
-	int m = A.n_cols;
-	if (n != m)
-	{
-		//oh no
-	}
-	int ind = 0;
-	for (int i = 0; i < n; ++i)
-	{
-		for (int j = 0; j <= i; ++j)
-		{
-			X.rowmajor[ind] = A(i,j);
-			++ind;
-		}
-	}
-}
-
-NumericMatrix cpp_to_r(const DenseMatrix& X)
-{
-	int n = X.rows;
-	int m = X.cols;
-
-	NumericMatrix A = NumericMatrix(n,m);
-	for (int i = 0; i < n; ++i)
-	{
-		for (int j = 0; j < m; ++j)
-		{
-			A(i,j) = X.rowmajor[i][j];
-		}
-	}
-	return A;
-}
-
-void cpp_to_arma(const DenseMatrix& X, mat& A)
-{
-	int n = X.rows;
-	int m = X.cols;
-	A.set_size(n,m);
-	for(int i = 0; i < n; ++i)
-	{
-		for (int j = 0; j < m; ++j)
-		{
-			A(i,j) = X.rowmajor[i][j];
-		}
-	}
-}
-
-// [[Rcpp::export]]
-arma::mat solveNNLS(const arma::mat& C, const arma::mat& B)
-{
-	int max_iter_nnls = 100;
-	LowerTriangularMatrix CTC(C.n_cols);
-	DenseMatrix CTB = DenseMatrix(B.n_cols,C.n_cols);
-	arma_to_cpp_lower_triangular(C.t() * C,CTC);
-	arma_to_cpp(B.t() * C,CTB);
-
-	DenseMatrix X(C.n_cols,B.n_cols);
-	NNLS_Multiple_Input nnls_input = NNLS_Multiple_Input(&CTC,X.colmajor,CTB.rowmajor,B.n_cols,max_iter_nnls);
-	nnls_multiple_cpu(nnls_input);
-	X.copyColumnToRow();
-
-	mat res;
-	cpp_to_arma(X,res);
-	return res;
-}
-
diff --git a/src/vector.h b/src/vector.h
deleted file mode 100755
index 57340a66..00000000
--- a/src/vector.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-
-#include "parameters.h"
-
-dtype vectordot(dtype*a,dtype*b,int size)
-{
-	dtype sum = 0;
-	for(int i=0;i<size;++i) sum += a[i]*b[i];
-	return sum;
-}
-dtype productsum(dtype*a,dtype*b,int size)
-{
-	dtype sum=0;
-	for(int i=0;i<size;++i) sum += a[i]*b[i];
-	return sum;
-}
-void vectoradd(dtype*a,dtype*b,int size,dtype factor=1)
-{
-	for(int i=0;i<size;++i)
-	{
-		b[i] += factor*a[i];
-	}
-}
-void vectorsub(dtype*a,dtype*b,int size)
-{
-	for(int i=0;i<size;++i) b[i] -= a[i];
-}
-void vectornegate(dtype*a,dtype*b,int size)
-{
-	for(int i=0;i<size;++i) b[i] = -a[i];
-}
-void vectorinit(dtype*a,int size,dtype value)
-{
-	for(int i=0;i<size;++i) a[i] = value;
-}