diff --git a/.RData b/.RData new file mode 100644 index 000000000..685a31867 Binary files /dev/null and b/.RData differ diff --git a/.gitignore b/.gitignore index c85245010..d8e811272 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ merfish_preoptic/ .vscode/* .Rprofile inst/python/.ipynb_checkpoints/ +docs diff --git a/DESCRIPTION b/DESCRIPTION index 8fcd0beb4..0142335dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -109,6 +109,7 @@ Suggests: smfishHmrf, SPARK, SpatialExperiment, + spdep, SummarizedExperiment, tiff, trendsceek, @@ -143,6 +144,7 @@ Collate: 'interactivity.R' 'interoperability.R' 'poly_influence.R' + 'python_bento.R' 'python_environment.R' 'python_hmrf.R' 'python_scrublet.R' @@ -154,6 +156,7 @@ Collate: 'spatial_interaction_visuals.R' 'spatial_structures.R' 'spatial_visuals.R' + 'spdep.R' 'utilities.R' 'utils-pipe.R' 'variable_genes.R' diff --git a/NAMESPACE b/NAMESPACE index fc9c382a2..8fd27481f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,8 @@ export(calculateOverlapParallel) export(calculateOverlapPolygonImages) export(calculateOverlapRaster) export(calculateOverlapSerial) +export(calculateSpatCellMetadataProportions) +export(callSpdep) export(cellProximityBarplot) export(cellProximityEnrichment) export(cellProximityEnrichmentEachSpot) @@ -85,6 +87,7 @@ export(comparePolygonExpression) export(convertEnsemblToGeneSymbol) export(convertGiottoLargeImageToMG) export(createArchRProj) +export(createBentoAdata) export(createCellMetaObj) export(createCrossSection) export(createDimObj) @@ -141,6 +144,7 @@ export(dimPlot2D) export(dimPlot3D) export(distGiottoImage) export(doCellSegmentation) +export(doClusterProjection) export(doFeatureSetEnrichment) export(doGiottoClustree) export(doHMRF) @@ -148,6 +152,7 @@ export(doHMRF_V2) export(doHclust) export(doKmeans) export(doLeidenCluster) +export(doLeidenClusterIgraph) export(doLeidenSubCluster) export(doLouvainCluster) export(doLouvainSubCluster) @@ -229,7 +234,8 @@ export(giottoMasterToSuite) export(giottoPoints) export(giottoPolygon) export(giottoToAnnData) -export(giottoToSeurat) +export(giottoToSeuratV4) +export(giottoToSeuratV5) export(giottoToSpatialExperiment) export(heatmSpatialCorFeats) export(heatmSpatialCorGenes) @@ -332,11 +338,14 @@ export(runIntegratedUMAP) export(runPAGEEnrich) export(runPAGEEnrich_OLD) export(runPCA) +export(runPCAprojection) +export(runPCAprojectionBatch) export(runPatternSimulation) export(runRankEnrich) export(runSpatialDeconv) export(runSpatialEnrich) export(runUMAP) +export(runUMAPprojection) export(runWNN) export(runtSNE) export(saveGiotto) @@ -364,8 +373,8 @@ export(set_spatialGrid) export(set_spatialNetwork) export(set_spatial_enrichment) export(set_spatial_locations) -export(seuratToGiotto) -export(seuratToGiotto_OLD) +export(seuratToGiottoV4) +export(seuratToGiottoV5) export(showCellProportionSwitchedPie) export(showCellProportionSwitchedSanKey) export(showClusterDendrogram) @@ -437,6 +446,7 @@ export(spatialAutoCorGlobal) export(spatialAutoCorLocal) export(spatialDE) export(spatialExperimentToGiotto) +export(spdepAutoCorr) export(specificCellCellcommunicationScores) export(stitchFieldCoordinates) export(stitchGiottoLargeImage) diff --git a/NEWS.md b/NEWS.md index 6d2248e57..cd611ee1a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,21 @@ +# Giotto Suite 3.3.2 (Release TBD) + +## Breaking Changes + +## Added +- Added `checkPythonPackage()` within `python_environment.R` to check for a python package within a provided environment, and install that package there if it is not found. Supports github url installation. Supports package version specification, but not version minima, maxima, or ranges (i.e., "pysodb==1.0.0" is supported, "pysodb>=1.0.0" is not) +- Added `py_install_prompt()` within `python_environment.R` to prompt a user to install a python package within a provided environment +- Added `install_github_link_pip()` within `python_environment.R` to use pip to install a python package from a github link within a provided environment after prompting the user. +- Added `install_py_pkg_reticulate()` within `python_environment.R` to install a specified python package within a provided environment via `reticulate::py_install()` after prompting the user. +- New file `python_bento.R` +- Added `createBentoAdata()` within `python_bento.R` to create a bento-flavor anndata object from a Giotto Object + +## Changes +- Improved performance of gefToGiotto() +- Added `env_name` as an argument to `anndataToGiotto()` and `giottoToAnnData()` for compatability with `checkPythonPackage()` +- Deprecated internal function `check_py_for_scanpy()` in favor of new function `checkPythonPackage()` + # Giotto Suite 3.3.1 (2023-08-02) @@ -28,10 +45,10 @@ ## Changes - Fix bug in `combine_matrices()` - Fix bug in `createGiottoObject()` that will not allow object creation without supplied expression information -- Updated `polyStamp()` to replace an apply function with a crossjoin for better performance. -- Updated `spatInSituPlotPoints()` with `plot_last` parameter. Default output now plots polygons above points for better visibility. +- Updated `polyStamp()` to replace an apply function with a crossjoin for better performance +- Updated `spatInSituPlotPoints()` with `plot_last` parameter. Default output now plots polygons above points for better visibility - Add check for spatLocsObj for spatlocs in polyStamp() -- Removed various print() and cat() statements throughout. +- Removed various print() and cat() statements throughout - Changed default verbose argument to FALSE for createGiottoObject - Changed default verbose argument to FALSE for joinGiottoObjects - Changed default verbose argument to FALSE for createGiottoObjectSubcellular @@ -154,7 +171,7 @@ - Fix `loadGiotto()` loss of over-allocation for data.tables-based objects after loading from disk -# Giotto Suite 3.1.0 (2202-12-01) +# Giotto Suite 3.1.0 (2022-12-01) ## Added @@ -209,7 +226,7 @@ -# Giotto Suite 2.1.0 (2202-11-09) +# Giotto Suite 2.1.0 (2022-11-09) ## Breaking Changes - Update of python version to **3.10.2** [details](https://giottosuite.readthedocs.io/en/latest/additionalinformation.html#giotto-suite-2-1-0-2202-11-09) diff --git a/R/auxiliary_giotto.R b/R/auxiliary_giotto.R index 2dc49cc69..0e804d441 100644 --- a/R/auxiliary_giotto.R +++ b/R/auxiliary_giotto.R @@ -4774,6 +4774,146 @@ calculateMetaTableCells = function(gobject, +#' @title calculateSpatCellMetadataProportions +#' @name calculateSpatCellMetadataProportions +#' @description calculates a proportion table for a cell metadata column (e.g. cluster labels) +#' for all the spatial neighbors of a source cell. In other words it calculates the +#' niche composition for a given annotation for each cell. +#' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param spat_network spatial network +#' @param metadata_column metadata column to use +#' @param name descriptve name for the calculated proportions +#' @param metadata_cols annotation columns found in \code{pDataDT(gobject)} +#' @param return_gobject return giotto object +#' @return giotto object (default) or enrichment object if return_gobject = FALSE +#' @export +calculateSpatCellMetadataProportions = function(gobject, + spat_unit = NULL, + feat_type = NULL, + spat_network = NULL, + metadata_column = NULL, + name = 'proportion', + return_gobject = TRUE){ + + + if(is.null(spat_network)) stop('spat_network = NULL, you need to provide an existing spatial network') + if(is.null(metadata_column)) stop('metadata_column = NULL, you need to provide an existing cell metadata column') + + # Set feat_type and spat_unit + spat_unit = set_default_spat_unit(gobject = gobject, + spat_unit = spat_unit) + feat_type = set_default_feat_type(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type) + + # get spatial network to use + sp_network = get_spatialNetwork(gobject = gobject, + spat_unit = spat_unit, + name = spat_network, + output = 'networkDT') + + # convert spatial network to a full spatial network + sp_network = convert_to_full_spatial_network(reduced_spatial_network_DT = sp_network) + + # get cell metadata + cell_meta = get_cell_metadata(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = 'data.table') + + # merge spatial network and cell metadata + network_annot = data.table::merge.data.table(network, cell_meta[,c('cell_ID', metadata_column), with = FALSE], by.x = 'source', by.y = 'cell_ID') + setnames(network_annot, old = metadata_column, 'source_clus') + network_annot = data.table::merge.data.table(network_annot, cell_meta[,c('cell_ID', metadata_column), with = FALSE], by.x = 'target', by.y = 'cell_ID') + setnames(network_annot, old = metadata_column, 'target_clus') + + # create self information: source cell is its own neighbor + source_annot_info = unique(network_annot[,.(source, source_clus)]) + setnames(source_annot_info, 'source_clus', 'label') + source_annot_info[, target := source] + source_annot_info = source_annot_info[,.(source, target, label)] + + # network information: source cells and other neighbors + target_annot_info = unique(network_annot[,.(source, target, target_clus)]) + setnames(target_annot_info, 'target_clus', 'label') + + # combine: provides most detailed information about neighbors + final_annot_info = rbindlist(list(source_annot_info, target_annot_info)) + + + + # calculate proportions of neighbors + tableres = final_annot_info[, names(table(label)), by = 'source'] + setnames(tableres, 'V1', 'tablelabels') + propensities = final_annot_info[, prop.table(table(label)), by = 'source'] + setnames(propensities, 'V1', 'proptable') + + + # data.table variables + label = NULL + propensities[, label := tableres$tablelabels] + propensities[, proptable := as.numeric(proptable)] + proportions_mat = dcast.data.table(propensities, formula = 'source~label', fill = 0, value.var = 'proptable') + data.table::setnames(x = proportions_mat, old = 'source', new = 'cell_ID') + + # convert to matrix + # proportions_matrix = dt_to_matrix(proportions_mat) + # proportions_matrix[1:4, 1:10] + + # create spatial enrichment object + enrObj = create_spat_enr_obj(name = name, + method = 'rank', + enrichDT = proportions_mat, + spat_unit = spat_unit, + feat_type = feat_type, + provenance = NULL, + misc = NULL) + + + ## return object or results ## + if(return_gobject == TRUE) { + + spenr_names = list_spatial_enrichments_names(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type) + + + if(name %in% spenr_names) { + cat('\n ', name, ' has already been used, will be overwritten \n') + } + + ## update parameters used ## + parameters_list = gobject@parameters + number_of_rounds = length(parameters_list) + update_name = paste0(number_of_rounds,'_spatial_enrichment') + + + ## update parameters used ## + gobject = update_giotto_params(gobject, description = '_enrichment') + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject = set_spatial_enrichment(gobject = gobject, + spatenrichment = enrObj) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + return(gobject) + + } else { + + return(enrObj) + + } + + + +} + + + + + #' @title combineMetadata #' @name combineMetadata #' @description This function combines the cell metadata with spatial locations and diff --git a/R/clustering.R b/R/clustering.R index d9e8a93c0..74f69584e 100644 --- a/R/clustering.R +++ b/R/clustering.R @@ -184,11 +184,156 @@ doLeidenCluster = function(gobject, } + + + +#' @title doLeidenClusterIgraph +#' @name doLeidenClusterIgraph +#' @description cluster cells using a NN-network and the Leiden community +#' detection algorithm as implemented in igraph +#' @param gobject giotto object +#' @param spat_unit spatial unit (e.g. "cell") +#' @param feat_type feature type (e.g. "rna", "dna", "protein") +#' @param name name for cluster, default to "leiden_clus" +#' @param nn_network_to_use type of NN network to use (kNN vs sNN), default to "sNN" +#' @param network_name name of NN network to use, default to "sNN.pca" +#' @param objective_function objective function for the leiden algo +#' @param weights weights of edges +#' @param resolution_parameter resolution, default = 1 +#' @param beta leiden randomness +#' @param initial_membership initial membership of cells for the partition +#' @param n_iterations number of interations to run the Leiden algorithm. +#' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param set_seed set seed +#' @param seed_number number for seed +#' @return giotto object with new clusters appended to cell metadata +#' @details +#' This function is a wrapper for the Leiden algorithm implemented in igraph, +#' which can detect communities in graphs of millions of nodes (cells), +#' as long as they can fit in memory. See \code{\link[igraph]{cluster_leiden}} for more information. +#' +#' Set \emph{weights = NULL} to use the vertices weights associated with the igraph network. +#' Set \emph{weights = NA} if you don't want to use vertices weights +#' +#' @export +doLeidenClusterIgraph = function(gobject, + spat_unit = NULL, + feat_type = NULL, + name = 'leiden_clus', + nn_network_to_use = 'sNN', + network_name = 'sNN.pca', + objective_function = c("modularity", "CPM"), + weights = NULL, + resolution_parameter = 1, + beta = 0.01, + initial_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234, + ...) { + + + # Set feat_type and spat_unit + spat_unit = set_default_spat_unit(gobject = gobject, + spat_unit = spat_unit) + feat_type = set_default_feat_type(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type) + + ## get cell IDs ## + cell_ID_vec = gobject@cell_ID[[spat_unit]] + + ## select network to use + igraph_object = get_NearestNetwork(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + nn_network_to_use = nn_network_to_use, + network_name = network_name, + output = 'igraph') + + ## select partition type + objective_function = match.arg(objective_function, + choices = c("modularity", "CPM")) + + ## set seed + if(isTRUE(set_seed)) { + seed_number = as.integer(seed_number) + } else { + seed_number = as.integer(sample(x = 1:10000, size = 1)) + } + + # make igraph network undirected + graph_object_undirected = igraph::as.undirected(igraph_object) + set.seed(seed_number) + leiden_clusters = igraph::cluster_leiden(graph = graph_object_undirected, + objective_function = objective_function, + resolution_parameter = resolution_parameter, + beta = beta, + weights = weights, + initial_membership = initial_membership, + n_iterations = n_iterations, + ...) + + # summarize results + ident_clusters_DT = data.table::data.table('cell_ID' = leiden_clusters$names, 'name' = leiden_clusters$membership) + data.table::setnames(ident_clusters_DT, 'name', name) + + + + ## add clusters to metadata ## + if(return_gobject == TRUE) { + + + cluster_names = names(pDataDT(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type)) + #cluster_names = names(gobject@cell_metadata[[spat_unit]][[feat_type]]) + + if(name %in% cluster_names) { + cat('\n ', name, ' has already been used, will be overwritten \n') + cell_metadata = get_cell_metadata(gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = 'cellMetaObj', + copy_obj = TRUE) + + cell_metadata[][, eval(name) := NULL] + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject = set_cell_metadata(gobject, + metadata = cell_metadata, + verbose = FALSE) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + } + + gobject = addCellMetadata(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + new_metadata = ident_clusters_DT[, c('cell_ID', name), with = FALSE], + by_column = TRUE, column_cell_ID = 'cell_ID') + + ## update parameters used ## + gobject = update_giotto_params(gobject, description = '_cluster') + return(gobject) + + + } else { + + # else return clustering result + return(ident_clusters_DT) + } + + +} + + + #' @title doGiottoClustree #' @name doGiottoClustree #' @description cluster cells using leiden methodology to visualize different resolutions #' @param gobject giotto object -#' @param res_vector vector of different resolutions to test +#' @param res_vector vector of different resolutions to test #' @param res_seq list of float numbers indicating start, end, and step size for resolution testing, i.e. (0.1, 0.6, 0.1) #' @param return_gobject default FALSE. See details for more info. #' @param show_plot by default, pulls from provided gobject instructions @@ -198,15 +343,15 @@ doLeidenCluster = function(gobject, #' @param default_save_name name of saved plot, defaut "clustree" #' @return a plot object (default), OR a giotto object (if specified) #' @details This function tests different resolutions for Leiden clustering and provides a visualization -#' of cluster sizing as resolution varies. -#' +#' of cluster sizing as resolution varies. +#' #' By default, the tested leiden clusters are NOT saved to the Giotto object, and a plot is returned. -#' -#' If return_gobject is set to TRUE, and a giotto object with *all* tested leiden cluster information -#' will be returned. +#' +#' If return_gobject is set to TRUE, and a giotto object with *all* tested leiden cluster information +#' will be returned. #' @seealso \code{\link{doLeidenCluster}} #' @export -doGiottoClustree <- function(gobject, +doGiottoClustree <- function(gobject, res_vector = NULL, res_seq = NULL, return_gobject = FALSE, @@ -223,28 +368,28 @@ doGiottoClustree <- function(gobject, res_vector = seq(res_seq[1], res_seq[2], res_seq[3]) } else stop("Please input res_vector or res_seq parameters") } - + ## performing multiple leiden clusters at resolutions specified for (i in res_vector){ gobject = doLeidenCluster(gobject = gobject, resolution = i, name = paste0("leiden_clustree_", print(i), ...)) } - + ## plotting clustree graph pl = clustree::clustree(pDataDT(gobject), prefix = "leiden_clustree_", ...) show_plot = ifelse(is.na(show_plot), readGiottoInstructions(gobject, param = 'show_plot'), show_plot) save_plot = ifelse(is.na(save_plot), readGiottoInstructions(gobject, param = 'save_plot'), save_plot) return_plot = ifelse(is.na(return_plot), readGiottoInstructions(gobject, param = 'return_plot'), return_plot) - + ## add show_plot = ifelse(is.na(show_plot), readGiottoInstructions(gobject, param = "show_plot"), show_plot) save_plot = ifelse(is.na(save_plot), readGiottoInstructions(gobject, param = "save_plot"), save_plot) return_plot = ifelse(is.na(return_plot), readGiottoInstructions(gobject, param = "return_plot"), return_plot) - + ## print plot if(show_plot == TRUE) { print(pl) } - + ## save plot if(save_plot == TRUE) { do.call('all_plots_save_function', c(list(gobject = gobject, plot_object = pl, default_save_name = default_save_name), save_param)) @@ -254,7 +399,7 @@ doGiottoClustree <- function(gobject, if(return_gobject == TRUE){ return(gobject) } - + ## return plot if(return_plot == TRUE) { return(pl) @@ -2738,3 +2883,158 @@ getDendrogramSplits = function(gobject, +#' @title Project of cluster labels +#' @name doClusterProjection +#' @description Use a fast KNN classifier to predict labels from a smaller giotto object +#' @param target_gobject target giotto object +#' @param target_cluster_label_name name for predicted clusters +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param source_gobject source giotto object with annotation data +#' @param source_cluster_labels annotation/labels to use to train KNN classifier +#' @param reduction reduction on cells or features (default = cells) +#' @param reduction_method shared reduction method (default = pca space) +#' @param reduction_name name of shared reduction space (default name = 'pca') +#' @param dimensions_to_use dimensions to use in shared reduction space (default = 1:10) +#' @param knn_k number of k-neighbors to train a KNN classifier +#' @param prob output probabilities together with label predictions +#' @param algorithm nearest neighbor search algorithm +#' @param return_gobject return giotto object +#' @return giotto object (default) or data.table with cell metadata +#' +#' @details Function to train a KNN with \code{\link[FNN]{knn}}. The training data +#' is obtained from the source giotto object (source_gobject) using existing annotations +#' within the cell metadata. Cells without annotation/labels from the target giotto +#' object (target_gobject) will receive predicted labels (and optional probabilities +#' with prob = TRUE). +#' +#' **IMPORTANT** This projection assumes that you're using the same dimension reduction +#' space (e.g. PCA) and number of dimensions (e.g. first 10 PCs) to train the KNN +#' classifier as you used to create the initial annotations/labels in the source +#' Giotto object. +#' +#' Altogether this is a convenience function that allow you to work with very big +#' data as you can predict cell labels on a smaller & subsetted Giotto object and then +#' project the cell labels to the remaining cells in the target Giotto object. +#' @export +doClusterProjection = function(target_gobject, + target_cluster_label_name = 'knn_labels', + spat_unit = NULL, + feat_type = NULL, + source_gobject, + source_cluster_labels = NULL, + reduction = 'cells', + reduction_method = 'pca', + reduction_name = 'pca', + dimensions_to_use = 1:10, + knn_k = 10, + prob = FALSE, + algorithm = c("kd_tree", + "cover_tree", "brute"), + return_gobject = TRUE) { + + + # package check for dendextend + package_check(pkg_name = "FNN", repository = "CRAN") + + # identify clusters from source object and create annotation vector + cell_meta_source = get_cell_metadata(gobject = source_gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = 'data.table') + source_annot_vec = cell_meta_source[[source_cluster_labels]] + names(source_annot_vec) = cell_meta_source[['cell_ID']] + + # create the matrix from the target object that you want to use for the kNN classifier + # the matrix should be the same for the source and target objects (e.g. same PCA space) + dim_obj = get_dimReduction(gobject = target_gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = reduction, + reduction_method = reduction_method, + name = reduction_name, + output = 'dimObj') + + dim_coord = dim_obj[] + dimensions_to_use = dimensions_to_use[dimensions_to_use %in% 1:ncol(dim_coord)] + matrix_to_use = dim_coord[, dimensions_to_use] + + ## create the training and testset from the matrix + + # the training set is the set of cell IDs that are in both the source (w/ labels) + # and target giotto object + train = matrix_to_use[rownames(matrix_to_use) %in% names(source_annot_vec),] + train = train[match(names(source_annot_vec), rownames(train)), ] + + # the test set are the remaining cell_IDs that need a label + test = matrix_to_use[!rownames(matrix_to_use) %in% names(source_annot_vec),] + cl = source_annot_vec + + # make prediction + knnprediction = FNN::knn(train = train, test = test, + cl = cl, k = knn_k, prob = prob, + algorithm = algorithm) + + # get prediction results + knnprediction_vec = as.vector(knnprediction) + names(knnprediction_vec) = rownames(test) + + # add probability information + if(isTRUE(prob)) { + probs = attr(knnprediction, "prob") + names(probs) = rownames(test) + } + + + + # create annotation vector for all cell IDs (from source and predicted) + all_vec = c(source_annot_vec, knnprediction_vec) + + cell_meta_target = get_cell_metadata(gobject = target_gobject, + spat_unit = spat_unit, + feat_type = feat_type, + output = 'data.table') + # data.table variables + temp_name = NULL + cell_meta_target[, temp_name := all_vec[cell_ID]] + + if(isTRUE(prob)) { + + cell_meta_target[, temp_name_prob := probs[cell_ID]] + cell_meta_target = cell_meta_target[,.(cell_ID, temp_name, temp_name_prob)] + cell_meta_target[, temp_name_prob := ifelse(is.na(temp_name_prob), 1, temp_name_prob)] + + data.table::setnames(cell_meta_target, + old = c('temp_name', 'temp_name_prob'), + new = c(target_cluster_label_name, paste0(target_cluster_label_name,'_prob'))) + } else { + + cell_meta_target = cell_meta_target[,.(cell_ID, temp_name)] + data.table::setnames(cell_meta_target, + old = 'temp_name', + new = target_cluster_label_name) + } + + + if(return_gobject) { + + if(isTRUE(prob)) { + + prob_label = paste0(target_cluster_label_name,'_prob') + + target_gobject = addCellMetadata(gobject = target_gobject, + new_metadata = cell_meta_target[, c('cell_ID', target_cluster_label_name, prob_label), with = FALSE], + by_column = TRUE, + column_cell_ID = 'cell_ID') + } else { + target_gobject = addCellMetadata(gobject = target_gobject, + new_metadata = cell_meta_target[, c('cell_ID', target_cluster_label_name), with = FALSE], + by_column = TRUE, + column_cell_ID = 'cell_ID') + } + + } else { + return(cell_meta_target) + } + +} diff --git a/R/dimension_reduction.R b/R/dimension_reduction.R index 981dfd59e..810eead4e 100644 --- a/R/dimension_reduction.R +++ b/R/dimension_reduction.R @@ -241,7 +241,7 @@ runPCA_factominer = function(x, #' @param set_seed use of seed #' @param seed_number seed number to use #' @param BSPARAM method to use -#' @param BSParameters additonal parameters for method +#' @param BPPARAM BiocParallelParam object #' @keywords internal #' @return list of eigenvalues, loadings and pca coordinates runPCA_BiocSingular = function(x, @@ -252,7 +252,7 @@ runPCA_BiocSingular = function(x, set_seed = TRUE, seed_number = 1234, BSPARAM = c('irlba', 'exact', 'random'), - BSParameters = list(NA), + BPPARAM = BiocParallel::SerialParam(), ...) { BSPARAM = match.arg(BSPARAM, choices = c('irlba', 'exact', 'random')) @@ -276,17 +276,20 @@ runPCA_BiocSingular = function(x, if(BSPARAM == 'irlba') { pca_res = BiocSingular::runPCA(x = x, rank = ncp, center = center, scale = scale, - BSPARAM = BiocSingular::IrlbaParam(BSParameters), + BSPARAM = BiocSingular::IrlbaParam(), + BPPARAM = BPPARAM, ...) } else if(BSPARAM == 'exact') { pca_res = BiocSingular::runPCA(x = x, rank = ncp, center = center, scale = scale, - BSPARAM = BiocSingular::ExactParam(BSParameters), + BSPARAM = BiocSingular::ExactParam(), + BPPARAM = BPPARAM, ...) } else if(BSPARAM == 'random') { pca_res = BiocSingular::runPCA(x = x, rank = ncp, center = center, scale = scale, - BSPARAM = BiocSingular::RandomParam(BSParameters), + BSPARAM = BiocSingular::RandomParam(), + BPPARAM = BPPARAM, ...) } @@ -309,17 +312,20 @@ runPCA_BiocSingular = function(x, if(BSPARAM == 'irlba') { pca_res = BiocSingular::runPCA(x = x, rank = ncp, center = center, scale = scale, - BSPARAM = BiocSingular::IrlbaParam(BSParameters), + BSPARAM = BiocSingular::IrlbaParam(), + BPPARAM = BPPARAM, ...) } else if(BSPARAM == 'exact') { pca_res = BiocSingular::runPCA(x = x, rank = ncp, center = center, scale = scale, - BSPARAM = BiocSingular::ExactParam(BSParameters), + BSPARAM = BiocSingular::ExactParam(), + BPPARAM = BPPARAM, ...) } else if(BSPARAM == 'random') { pca_res = BiocSingular::runPCA(x = x, rank = ncp, center = center, scale = scale, - BSPARAM = BiocSingular::RandomParam(BSParameters), + BSPARAM = BiocSingular::RandomParam(), + BPPARAM = BPPARAM, ...) } @@ -423,7 +429,7 @@ create_feats_to_use_matrix = function(gobject, #' @param scale_unit scale features before PCA (default = TRUE) #' @param ncp number of principal components to calculate #' @param method which implementation to use -#' @param method_params additional parameters +#' @param method_params BiocParallelParam object #' @param rev do a reverse PCA #' @param set_seed use of seed #' @param seed_number seed number to use @@ -451,7 +457,7 @@ runPCA <- function(gobject, scale_unit = TRUE, ncp = 100, method = c('irlba', 'exact', 'random','factominer'), - method_params = list(NA), + method_params = BiocParallel::SerialParam(), rev = FALSE, set_seed = TRUE, seed_number = 1234, @@ -488,31 +494,31 @@ runPCA <- function(gobject, spat_unit = spat_unit, values = values, output = 'exprObj') - + provenance = prov(expr_values) - + if(!is.null(slot(gobject, 'h5_file'))) { expr_path = slot(expr_values, 'exprMat') - + expr_values = HDF5Array::h5mread(filepath = slot(gobject, 'h5_file'), name = paste0('expression/', feat_type,'/', values)) - + expr_dimnames = HDF5Array::h5readDimnames(filepath = slot(gobject, 'h5_file'), name = paste0('expression/', feat_type,'/', values)) - + rownames(expr_values) = expr_dimnames[[1]] colnames(expr_values) = expr_dimnames[[2]] - + } else { expr_values = expr_values[] # extract matrix } - - - + + + ## subset matrix if(!is.null(feats_to_use)) { @@ -524,7 +530,7 @@ runPCA <- function(gobject, verbose = verbose) } - + # do PCA dimension reduction reduction = match.arg(reduction, c('cells', 'feats')) @@ -542,7 +548,7 @@ runPCA <- function(gobject, set_seed = set_seed, seed_number = seed_number, BSPARAM = method, - BSParameters = method_params, + BPPARAM = method_params, ...) } else if(method == 'factominer') { pca_object = runPCA_factominer(x = t_flex(expr_values), @@ -554,34 +560,741 @@ runPCA <- function(gobject, } else { stop('only PCA methods from the BiocSingular and factominer package have been implemented \n') } - - - } else { - # PCA on genes - if(method %in% c('irlba', 'exact', 'random')) { - pca_object = runPCA_BiocSingular(x = expr_values, - center = center, - scale = scale_unit, - ncp = ncp, - rev = rev, - set_seed = set_seed, - seed_number = seed_number, - BSPARAM = method, - BSParameters = method_params, - ...) - } else if(method == 'factominer') { - pca_object = runPCA_factominer(x = expr_values, - scale = scale_unit, ncp = ncp, rev = rev, - set_seed = set_seed, seed_number = seed_number, ...) - } else { - stop('only PCA methods from the irlba and factominer package have been implemented \n') + + } else { + # PCA on genes + if(method %in% c('irlba', 'exact', 'random')) { + pca_object = runPCA_BiocSingular(x = expr_values, + center = center, + scale = scale_unit, + ncp = ncp, + rev = rev, + set_seed = set_seed, + seed_number = seed_number, + BSPARAM = method, + BPPARAM = method_params, + ...) + + } else if(method == 'factominer') { + pca_object = runPCA_factominer(x = expr_values, + scale = scale_unit, ncp = ncp, rev = rev, + set_seed = set_seed, seed_number = seed_number, ...) + } else { + stop('only PCA methods from the irlba and factominer package have been implemented \n') + } + + } + + print("finished runPCA_factominer, method == factominer") + + + if(return_gobject == TRUE) { + + pca_names = list_dim_reductions_names(gobject = gobject, + data_type = reduction, + spat_unit = spat_unit, + feat_type = feat_type, + dim_type = 'pca') + + if(name %in% pca_names) { + cat('\n ', name, ' has already been used, will be overwritten \n') + } + + if (reduction == "cells") { + my_row_names = colnames(expr_values) + } else { + my_row_names = rownames(expr_values) + } + + dimObject = create_dim_obj(name = name, + feat_type = feat_type, + spat_unit = spat_unit, + provenance = provenance, + reduction = reduction, + reduction_method = 'pca', + coordinates = pca_object$coords, + misc = list(eigenvalues = pca_object$eigenvalues, + loadings = pca_object$loadings), + my_rownames = my_row_names) + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject = set_dimReduction(gobject = gobject, dimObject = dimObject) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + + ## update parameters used ## + gobject = update_giotto_params(gobject, description = '_pca') + return(gobject) + + + } else { + return(pca_object) + } +} + + +## * PCA projections #### +# ---------------------- # + + +#' @title runPCA_BiocSingular_irlba_projection +#' @name runPCA_BiocSingular_irlba_projection +#' @description Performs PCA based on the biocSingular package on a +#' subset of the matrix. It uses the obtained loadings to predicted coordinates +#' for the remaining matrix. +#' @param x matrix or object that can be converted to matrix +#' @param ncp number of principal components to calculate +#' @param center center the matrix before pca +#' @param scale scale features +#' @param rev reverse PCA +#' @param set_seed use of seed +#' @param seed_number seed number to use +#' @param BPPARAM BiocParallelParam object +#' @param random_subset random subset to perform PCA on +#' @param verbose verbosity level +#' @keywords internal +#' @return list of eigenvalues, loadings and pca coordinates +runPCA_BiocSingular_irlba_projection = function(x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BPPARAM = BiocParallel::SerialParam(), + random_subset = 500, + verbose = TRUE, + ...) { + + + x = scale(x, center = center, scale = scale) + + min_ncp = min(dim(x)) + + if(ncp >= min_ncp) { + warning("ncp >= minimum dimension of x, will be set to minimum dimension of x - 1") + ncp = min_ncp-1 + } + + # start seed + if(isTRUE(set_seed)) { + set.seed(seed = seed_number) + } + + + + + if(isTRUE(rev)) { + + x = t_flex(x) + + # store cell ID order information + cell_ID_order = rownames(x) + + # create random selection + random_selection = sort(sample(1:nrow(x), random_subset)) + subsample_matrix = x[random_selection, ] + + + if(verbose) wrap_msg('pca random subset: start') + + # pca on random selection + pca_res = BiocSingular::runPCA(x = subsample_matrix, + rank = ncp, + center = F, scale = F, + BSPARAM = BiocSingular::IrlbaParam(), + BPPARAM = BPPARAM, + ...) + + if(verbose) wrap_msg('pca random subset: done') + if(verbose) wrap_msg('pca prediction: start') + + # create leftover matrix + leftover_matrix = x[-random_selection, ] + + # predict on leftover matrix + class(pca_res) = 'prcomp' + pca_res$center = FALSE + pca_res$scale = FALSE + project_results = predict(pca_res, leftover_matrix) + + if(verbose) wrap_msg('pca prediction: done') + + # combine subsample + predicted coordinates + coords = rbind(pca_res$x, project_results) + coords = coords[match(cell_ID_order, rownames(coords)), ] + + # eigenvalues + eigenvalues = pca_res$sdev^2 + + # PC loading + loadings = coords + rownames(loadings) = rownames(x) + colnames(loadings) = paste0('Dim.', 1:ncol(loadings)) + + # coordinates + coords = pca_res$rotation + rownames(coords) = colnames(x) + colnames(coords) = paste0('Dim.', 1:ncol(coords)) + + result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords) + + + } else { + + # store cell ID order information + cell_ID_order = rownames(x) + + # create random selection + random_selection = sort(sample(1:nrow(x), random_subset)) + subsample_matrix = x[random_selection, ] + + if(verbose) wrap_msg('pca random subset: start') + + pca_res = BiocSingular::runPCA(x = subsample_matrix, + rank = ncp, + center = F, scale = F, + BSPARAM = BiocSingular::IrlbaParam(), + BPPARAM = BPPARAM, + ...) + + if(verbose) wrap_msg('pca random subset: done') + if(verbose) wrap_msg('pca prediction: start') + + # create leftover matrix + leftover_matrix = x[-random_selection, ] + + # predict on leftover matrix + class(pca_res) = 'prcomp' + pca_res$center = FALSE + pca_res$scale = FALSE + project_results = predict(pca_res, leftover_matrix) + + if(verbose) wrap_msg('pca prediction: done') + + # combine subsample + predicted coordinates + coords = rbind(pca_res$x, project_results) + coords = coords[match(cell_ID_order, rownames(coords)), ] + + # eigenvalues + eigenvalues = pca_res$sdev^2 + + # PC loading + loadings = pca_res$rotation + rownames(loadings) = colnames(x) + colnames(loadings) = paste0('Dim.', 1:ncol(loadings)) + + # coordinates + colnames(coords) = paste0('Dim.', 1:ncol(coords)) + + result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords) + + } + + + # exit seed + if(isTRUE(set_seed)) { + set.seed(seed = Sys.time()) + } + + return(result) + +} + + + + + + +#' @title runPCAprojection +#' @name runPCAprojection +#' @description runs a Principal Component Analysis on a random subet + projection +#' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param expression_values expression values to use +#' @param reduction cells or genes +#' @param random_subset random subset to perform PCA on +#' @param name arbitrary name for PCA run +#' @param feats_to_use subset of features to use for PCA +#' @param genes_to_use deprecated use feats_to_use +#' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param center center data first (default = TRUE) +#' @param scale_unit scale features before PCA (default = TRUE) +#' @param ncp number of principal components to calculate +#' @param method which implementation to use +#' @param method_params BiocParallelParam object +#' @param rev do a reverse PCA +#' @param set_seed use of seed +#' @param seed_number seed number to use +#' @param verbose verbosity of the function +#' @param ... additional parameters for PCA (see details) +#' @return giotto object with updated PCA dimension recuction +#' @details See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. +#' This PCA implementation is similar to \code{\link{runPCA}}, except that it +#' performs PCA on a subset of the cells or features, and predict on the others. +#' This can significantly increase speed without sacrificing accuracy too much. +#' \itemize{ +#' \item feats_to_use = NULL: will use all features from the selected matrix +#' \item feats_to_use = : can be used to select a column name of +#' highly variable features, created by (see \code{\link{calculateHVF}}) +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features +#' } +#' @export +runPCAprojection = function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c('normalized', 'scaled', 'custom'), + reduction = c('cells', 'feats'), + random_subset = 500, + name = 'pca.projection', + feats_to_use = 'hvf', + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c('irlba'), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { + + + # Set feat_type and spat_unit + spat_unit = set_default_spat_unit(gobject = gobject, + spat_unit = spat_unit) + feat_type = set_default_feat_type(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type) + + # specify name to use for pca + if(is.null(name)) { + if(feat_type == 'rna') { + name = 'pca' + } else { + name = paste0(feat_type,'.','pca') + } + } + + # expression values to be used + values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values))) + expr_values = get_expression_values(gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + values = values, + output = 'exprObj') + + provenance = prov(expr_values) + + if(!is.null(slot(gobject, 'h5_file'))) { + expr_path = slot(expr_values, 'exprMat') + + expr_values = HDF5Array::h5mread(filepath = slot(gobject, 'h5_file'), + name = paste0('expression/', + feat_type,'/', + values)) + + expr_dimnames = HDF5Array::h5readDimnames(filepath = slot(gobject, 'h5_file'), + name = paste0('expression/', + feat_type,'/', + values)) + + rownames(expr_values) = expr_dimnames[[1]] + colnames(expr_values) = expr_dimnames[[2]] + + } else { + expr_values = expr_values[] # extract matrix + } + + + + ## subset matrix + if(!is.null(feats_to_use)) { + expr_values = create_feats_to_use_matrix(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + sel_matrix = expr_values, + feats_to_use = feats_to_use, + verbose = verbose) + } + + + # do PCA dimension reduction + reduction = match.arg(reduction, c('cells', 'feats')) + + # PCA implementation + method = match.arg(method, c('irlba')) + + if(reduction == 'cells') { + + # PCA on cells + pca_object = runPCA_BiocSingular_irlba_projection(x = t_flex(expr_values), + ncp = ncp, + center = center, + scale = scale_unit, + rev = rev, + set_seed = set_seed, + seed_number = seed_number, + BPPARAM = method_params, + random_subset = random_subset, + verbose = verbose, + ...) + + } else { + + # PCA on features + pca_object = runPCA_BiocSingular_irlba_projection(x = expr_values, + ncp = ncp, + center = center, + scale = scale_unit, + rev = rev, + set_seed = set_seed, + seed_number = seed_number, + BPPARAM = method_params, + random_subset = random_subset, + verbose = verbose, + ...) + + + + } + + if(return_gobject == TRUE) { + + pca_names = list_dim_reductions_names(gobject = gobject, + data_type = reduction, + spat_unit = spat_unit, + feat_type = feat_type, + dim_type = 'pca') + + if(name %in% pca_names) { + cat('\n ', name, ' has already been used, will be overwritten \n') + } + + if (reduction == "cells") { + my_row_names = colnames(expr_values) + } else { + my_row_names = rownames(expr_values) + } + + dimObject = create_dim_obj(name = name, + feat_type = feat_type, + spat_unit = spat_unit, + provenance = provenance, + reduction = reduction, + reduction_method = 'pca', + coordinates = pca_object$coords, + misc = list(eigenvalues = pca_object$eigenvalues, + loadings = pca_object$loadings), + my_rownames = my_row_names) + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject = set_dimReduction(gobject = gobject, dimObject = dimObject) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + + ## update parameters used ## + gobject = update_giotto_params(gobject, description = '_pca') + return(gobject) + + + + } else { + return(pca_object) + } + + +} + + + + + +#' @title runPCAprojectionBatch +#' @name runPCAprojectionBatch +#' @description runs a Principal Component Analysis on multiple random batches + projection +#' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param expression_values expression values to use +#' @param reduction cells or genes +#' @param random_subset random subset to perform PCA on +#' @param batch_number number of random batches to run +#' @param name arbitrary name for PCA run +#' @param feats_to_use subset of features to use for PCA +#' @param genes_to_use deprecated use feats_to_use +#' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param center center data first (default = TRUE) +#' @param scale_unit scale features before PCA (default = TRUE) +#' @param ncp number of principal components to calculate +#' @param method which implementation to use +#' @param method_params BiocParallelParam object +#' @param rev do a reverse PCA +#' @param set_seed use of seed +#' @param seed_number seed number to use +#' @param verbose verbosity of the function +#' @param ... additional parameters for PCA (see details) +#' @return giotto object with updated PCA dimension recuction +#' @details See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. +#' This PCA implementation is similar to \code{\link{runPCA}} and \code{\link{runPCAprojection}}, +#' except that it performs PCA on multiple subsets (batches) of the cells or features, +#' and predict on the others. This can significantly increase speed without sacrificing accuracy too much. +#' \itemize{ +#' \item feats_to_use = NULL: will use all features from the selected matrix +#' \item feats_to_use = : can be used to select a column name of +#' highly variable features, created by (see \code{\link{calculateHVF}}) +#' \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features +#' } +#' @export +runPCAprojectionBatch = function(gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c('normalized', 'scaled', 'custom'), + reduction = c('cells', 'feats'), + random_subset = 500, + batch_number = 5, + name = 'pca.projection.batch', + feats_to_use = 'hvf', + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c('irlba'), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ...) { + + + # Set feat_type and spat_unit + spat_unit = set_default_spat_unit(gobject = gobject, + spat_unit = spat_unit) + feat_type = set_default_feat_type(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type) + + # specify name to use for pca + if(is.null(name)) { + if(feat_type == 'rna') { + name = 'pca' + } else { + name = paste0(feat_type,'.','pca') + } + } + + # expression values to be used + values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values))) + expr_values = get_expression_values(gobject = gobject, + feat_type = feat_type, + spat_unit = spat_unit, + values = values, + output = 'exprObj') + + provenance = prov(expr_values) + + if(!is.null(slot(gobject, 'h5_file'))) { + expr_path = slot(expr_values, 'exprMat') + + expr_values = HDF5Array::h5mread(filepath = slot(gobject, 'h5_file'), + name = paste0('expression/', + feat_type,'/', + values)) + + expr_dimnames = HDF5Array::h5readDimnames(filepath = slot(gobject, 'h5_file'), + name = paste0('expression/', + feat_type,'/', + values)) + + rownames(expr_values) = expr_dimnames[[1]] + colnames(expr_values) = expr_dimnames[[2]] + + } else { + expr_values = expr_values[] # extract matrix + } + + + + ## subset matrix + if(!is.null(feats_to_use)) { + expr_values = create_feats_to_use_matrix(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + sel_matrix = expr_values, + feats_to_use = feats_to_use, + verbose = verbose) + } + + + # do PCA dimension reduction + reduction = match.arg(reduction, c('cells', 'feats')) + + # PCA implementation + method = match.arg(method, c('irlba')) + + if(reduction == 'cells') { + + pca_batch_results = list() + + for(batch in 1:batch_number) { + + if(verbose) wrap_msg('start batch ', batch) + + if(isTRUE(set_seed)) { + seed_batch = seed_number+batch + } else { + seed_batch = NULL + } + + + # PCA on cells + pca_object = runPCA_BiocSingular_irlba_projection(x = t_flex(expr_values), + ncp = ncp, + center = center, + scale = scale_unit, + rev = rev, + set_seed = set_seed, + seed_number = seed_batch, + BPPARAM = method_params, + random_subset = random_subset, + verbose = verbose, + ...) + + # adjust the sign of the coordinates and loadings vector relative to first batch + # this is necessary for the next averaging step + if(batch == 1) { + pca_batch_results[[batch]] = pca_object + } else { + + for(dimension in 1:ncol(pca_object[['coords']])) { + sum_evaluation = sum(sign(pca_batch_results[[1]][['coords']][1:20, dimension]) * + sign(pca_object[['coords']][1:20, dimension])) + if(sum_evaluation < 0) { + pca_object$coords[, dimension] = -1 * pca_object$coords[, dimension] + pca_object$loadings[, dimension] = -1 * pca_object$loadings[, dimension] + } + } + pca_batch_results[[batch]] = pca_object + } + } + + if(verbose) wrap_msg('start averaging pca results of batches') + + # calculate average eigenvalues, coordinates and loadings + # TODO: test out DT approach, might be faster and more efficient for + # large datasets + + # eigenvalues + eigenvalues_list = lapply(pca_batch_results, FUN = function(x) x$eigenvalues) + eigenvalues_matrix = do.call('cbind', eigenvalues_list) + eigenvalues_mean = rowMeans_flex(eigenvalues_matrix) + + # coordinates + coords_list = lapply(pca_batch_results, FUN = function(x) x$coords) + coords_vector = do.call('c', coords_list) + coords_array = array(data = coords_vector, dim = c(ncol(expr_values), ncp, length(pca_batch_results))) + coords_all = apply(coords_array, MARGIN = c(1:2), function(arr){mean(arr, na.rm=TRUE)}) + rownames(coords_all) = rownames(pca_batch_results[[1]][['coords']]) + colnames(coords_all) = colnames(pca_batch_results[[1]][['coords']]) + + # loadings + loadings_list = lapply(pca_batch_results, FUN = function(x) x$loadings) + loadings_vector = do.call('c', loadings_list) + loadings_array = array(data = loadings_vector, dim = c(nrow(expr_values), ncp, length(pca_batch_results))) + loadings_all = apply(loadings_array, MARGIN = c(1:2), function(arr){mean(arr, na.rm=TRUE)}) + rownames(loadings_all) = rownames(pca_batch_results[[1]][['loadings']]) + colnames(loadings_all) = colnames(pca_batch_results[[1]][['loadings']]) + + + pca_object = list(eigenvalues = eigenvalues_mean, loadings = loadings_all, coords = coords_all) + + + } else { + + + pca_batch_results = list() + + for(batch in 1:batch_number) { + + if(verbose) wrap_msg('start batch ', batch) + + if(isTRUE(set_seed)) { + seed_batch = seed_number+batch + } else { + seed_batch = NULL + } + + + # PCA on features + pca_object = runPCA_BiocSingular_irlba_projection(x = expr_values, + ncp = ncp, + center = center, + scale = scale_unit, + rev = rev, + set_seed = set_seed, + seed_number = seed_number, + BPPARAM = method_params, + random_subset = random_subset, + verbose = verbose, + ...) + + + # adjust the sign of the coordinates and loadings vector relative to first batch + # this is necessary for the next averaging step + if(batch == 1) { + pca_batch_results[[batch]] = pca_object + } else { + + for(dimension in 1:ncol(pca_object[['coords']])) { + sum_evaluation = sum(sign(pca_batch_results[[1]][['coords']][1:20, dimension]) * + sign(pca_object[['coords']][1:20, dimension])) + if(sum_evaluation < 0) { + pca_object$coords[, dimension] = -1 * pca_object$coords[, dimension] + pca_object$loadings[, dimension] = -1 * pca_object$loadings[, dimension] + } + } + pca_batch_results[[batch]] = pca_object + } } + if(verbose) wrap_msg('start averaging pca results of batches') + + # calculate average eigenvalues, coordinates and loadings + # TODO: test out DT approach, might be faster and more efficient for + # large datasets + + # eigenvalues + eigenvalues_list = lapply(pca_batch_results, FUN = function(x) x$eigenvalues) + eigenvalues_matrix = do.call('cbind', eigenvalues_list) + eigenvalues_mean = rowMeans_flex(eigenvalues_matrix) + + # coordinates + coords_list = lapply(pca_batch_results, FUN = function(x) x$coords) + coords_vector = do.call('c', coords_list) + coords_array = array(data = coords_vector, dim = c(ncol(expr_values), ncp, length(pca_batch_results))) + coords_all = apply(coords_array, MARGIN = c(1:2), function(arr){mean(arr, na.rm=TRUE)}) + rownames(coords_all) = rownames(pca_batch_results[[1]][['coords']]) + colnames(coords_all) = colnames(pca_batch_results[[1]][['coords']]) + + # loadings + loadings_list = lapply(pca_batch_results, FUN = function(x) x$loadings) + loadings_vector = do.call('c', loadings_list) + loadings_array = array(data = loadings_vector, dim = c(nrow(expr_values), ncp, length(pca_batch_results))) + loadings_all = apply(loadings_array, MARGIN = c(1:2), function(arr){mean(arr, na.rm=TRUE)}) + rownames(loadings_all) = rownames(pca_batch_results[[1]][['loadings']]) + colnames(loadings_all) = colnames(pca_batch_results[[1]][['loadings']]) + + + pca_object = list(eigenvalues = eigenvalues_mean, loadings = loadings_all, coords = coords_all) + + + } - - print("finished runPCA_factominer, method == factominer") if(return_gobject == TRUE) { @@ -623,12 +1336,16 @@ runPCA <- function(gobject, return(gobject) + } else { return(pca_object) } + + } + ## * PC estimates #### # ------------------ # @@ -1384,26 +2101,24 @@ runUMAP <- function(gobject, feat_type = feat_type, values = values, output = 'exprObj') - - + + provenance = prov(expr_values) + if(!is.null(slot(gobject, 'h5_file'))) { expr_path = slot(expr_values, 'exprMat') - + expr_values = HDF5Array::h5mread(filepath = slot(gobject, 'h5_file'), name = expr_path) - + expr_dimnames = HDF5Array::h5readDimnames(filepath = slot(gobject, 'h5_file'), name = expr_path) - + rownames(expr_values) = expr_dimnames[[1]] colnames(expr_values) = expr_dimnames[[2]] } else { - - provenance = prov(expr_values) expr_values = expr_values[] # extract matrix - } - + ## subset matrix if(!is.null(feats_to_use)) { expr_values = create_feats_to_use_matrix(gobject = gobject, @@ -1500,6 +2215,273 @@ runUMAP <- function(gobject, } + +#' @title Run UMAP dimension reduction +#' @name runUMAPprojection +#' @description run UMAP on subset and project on the rest +#' @param gobject giotto object +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param expression_values expression values to use +#' @param reduction cells or genes +#' @param dim_reduction_to_use use another dimension reduction set as input +#' @param dim_reduction_name name of dimension reduction set to use +#' @param dimensions_to_use number of dimensions to use as input +#' @param random_subset random subset to perform UMAP on +#' @param name arbitrary name for UMAP run +#' @param feats_to_use if dim_reduction_to_use = NULL, which genes to use +#' @param genes_to_use deprecated, use feats_to_use +#' @param return_gobject boolean: return giotto object (default = TRUE) +#' @param n_neighbors UMAP param: number of neighbors +#' @param n_components UMAP param: number of components +#' @param n_epochs UMAP param: number of epochs +#' @param min_dist UMAP param: minimum distance +#' @param n_threads UMAP param: threads/cores to use +#' @param spread UMAP param: spread +#' @param set_seed use of seed +#' @param seed_number seed number to use +#' @param verbose verbosity of function +#' @param toplevel_params parameters to extract +#' @param ... additional UMAP parameters +#' @return giotto object with updated UMAP dimension reduction +#' @details See \code{\link[uwot]{umap}} for more information about these and other parameters. +#' \itemize{ +#' \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') +#' \item To use gene expression as input set dim_reduction_to_use = NULL +#' \item If dim_reduction_to_use = NULL, genes_to_use can be used to select a column name of +#' highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes +#' \item multiple UMAP results can be stored by changing the \emph{name} of the analysis +#' } +#' @export +runUMAPprojection = function(gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c('normalized', 'scaled', 'custom'), + reduction = c('cells', 'feats'), + dim_reduction_to_use = 'pca', + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + random_subset = 500, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234, + verbose = T, + toplevel_params = 2, + ...) { + + + # Set feat_type and spat_unit + spat_unit = set_default_spat_unit(gobject = gobject, + spat_unit = spat_unit) + feat_type = set_default_feat_type(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type) + + reduction = match.arg(reduction, choices = c('cells', 'feats')) + + + # specify dim_reduction_name to use for pca input for umap + if(!is.null(dim_reduction_to_use)) { + if(dim_reduction_to_use == 'pca') { + if(is.null(dim_reduction_name)) { + if(feat_type == 'rna') { + dim_reduction_name = 'pca' + } else { + dim_reduction_name = paste0(feat_type,'.','pca') + } + } + } + } + + + + # specify name to use for umap + if(is.null(name)) { + if(feat_type == 'rna') { + name = 'umap.projection' + } else { + name = paste0(feat_type,'.','umap.projection') + } + } + + + # set cores to use + n_threads = determine_cores(cores = n_threads) + + ## umap on cells ## + if(reduction == 'cells') { + + ## using dimension reduction ## + if(!is.null(dim_reduction_to_use)) { + + ## TODO: check if reduction exists + dimObj_to_use = get_dimReduction(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + reduction = reduction, + reduction_method = dim_reduction_to_use, + name = dim_reduction_name, + output = 'dimObj') + + provenance = prov(dimObj_to_use) + matrix_to_use = dimObj_to_use[] + + matrix_to_use = matrix_to_use[, dimensions_to_use] + + + + #matrix_to_use = gobject@dimension_reduction[['cells']][[dim_reduction_to_use]][[dim_reduction_name]][['coordinates']][, dimensions_to_use] + + } else { + + ## using original matrix ## + # expression values to be used + values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values))) + + expr_values = get_expression_values(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + values = values, + output = 'exprObj') + + provenance = prov(expr_values) + + if(!is.null(slot(gobject, 'h5_file'))) { + expr_path = slot(expr_values, 'exprMat') + + expr_values = HDF5Array::h5mread(filepath = slot(gobject, 'h5_file'), + name = expr_path) + + expr_dimnames = HDF5Array::h5readDimnames(filepath = slot(gobject, 'h5_file'), + name = expr_path) + + rownames(expr_values) = expr_dimnames[[1]] + colnames(expr_values) = expr_dimnames[[2]] + } else { + expr_values = expr_values[] # extract matrix + } + + ## subset matrix + if(!is.null(feats_to_use)) { + expr_values = create_feats_to_use_matrix(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + sel_matrix = expr_values, + feats_to_use = feats_to_use, + verbose = verbose) + } + + matrix_to_use = t_flex(expr_values) + } + + # start seed + if(isTRUE(set_seed)) { + set.seed(seed = seed_number) + } + + + ## run umap ## + cell_ID_order = rownames(matrix_to_use) + + # create random selection + random_selection = sort(sample(1:nrow(matrix_to_use), random_subset)) + subsample_matrix = matrix_to_use[random_selection, ] + + uwot_clus_subset <- uwot::umap(X = subsample_matrix, + n_neighbors = n_neighbors, + n_components = n_components, + n_epochs = n_epochs, + min_dist = min_dist, + n_threads = n_threads, + spread = spread, + ret_model = TRUE, + ...) + + # create leftover matrix + leftover_matrix = matrix_to_use[-random_selection, ] + + # make prediction on leftover matrix + uwot_clus_pred = uwot::umap_transform(X = leftover_matrix, model = uwot_clus_subset) + + # combine subset and prediction + coords_umap = rbind(uwot_clus_subset$embedding, uwot_clus_pred) + coords_umap = coords_umap[match(cell_ID_order, rownames(coords_umap)), ] + + # data.table variables + coords_umap_DT = data.table::as.data.table(coords_umap) + cell_ID = NULL + coords_umap_DT[, cell_ID := rownames(coords_umap)] + + # exit seed + if(isTRUE(set_seed)) { + set.seed(seed = Sys.time()) + } + + + + } else if(reduction == 'feats') { + message('\n Feats reduction is not yet implemented \n') + } + + + + if(return_gobject == TRUE) { + + umap_names = list_dim_reductions_names(gobject = gobject, + data_type = reduction, + spat_unit = spat_unit, + feat_type = feat_type, + dim_type = 'umap') + + if(name %in% umap_names) { + message('\n ', name, ' has already been used, will be overwritten \n') + } + + + coordinates = coords_umap + + dimObject = create_dim_obj(name = name, + feat_type = feat_type, + spat_unit = spat_unit, + reduction = reduction, + provenance = provenance, + reduction_method = 'umap', + coordinates = coordinates, + misc = NULL) + + + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + gobject = set_dimReduction(gobject = gobject, dimObject = dimObject) + ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### + + + + ## update parameters used ## + gobject = update_giotto_params(gobject, + description = '_umap', + return_gobject = TRUE, + toplevel = toplevel_params) + return(gobject) + + + } else { + return(uwot_clus_pos_DT) + } + + + +} + + + #' @title Run tSNE dimensional reduction #' @name runtSNE #' @description run tSNE diff --git a/R/general_help.R b/R/general_help.R index a074c2535..5331fab80 100644 --- a/R/general_help.R +++ b/R/general_help.R @@ -1046,19 +1046,33 @@ loadGiotto = function(path_to_folder, gobject_file = list.files(path_to_folder, pattern = 'gobject') - if(grepl('.RDS', x = gobject_file)) { - gobject = do.call('readRDS', c(file = paste0(path_to_folder,'/','gobject.RDS'), load_params)) - } + if(identical(gobject_file, character(0))) { + + if(verbose) wrap_msg('giotto object was not found, skip loading giotto object \n') + + } else if(length(gobject_file) > 1) { + + if(verbose) wrap_msg('more than 1 giotto object was found, skip loading giotto object \n') + + } else { + + if(grepl('.RDS', x = gobject_file)) { + gobject = do.call('readRDS', c(file = paste0(path_to_folder,'/','gobject.RDS'), load_params)) + } + + if(grepl('.qs', x = gobject_file)) { + package_check(pkg_name = 'qs', repository = 'CRAN') + qread_fun = get("qread", asNamespace("qs")) + gobject = do.call(qread_fun, c(file = paste0(path_to_folder,'/','gobject.qs'), load_params)) + } - if(grepl('.qs', x = gobject_file)) { - package_check(pkg_name = 'qs', repository = 'CRAN') - qread_fun = get("qread", asNamespace("qs")) - gobject = do.call(qread_fun, c(file = paste0(path_to_folder,'/','gobject.qs'), load_params)) } + + ## 2. read in features if(verbose) wrap_msg('2. read Giotto feature information \n') feat_files = list.files(path = paste0(path_to_folder, '/Features'), pattern = '.shp') @@ -1084,6 +1098,7 @@ loadGiotto = function(path_to_folder, } + ## 3. read in spatial polygons if(isTRUE(verbose)) wrap_msg('3. read Giotto spatial information \n') spat_paths = list.files(path = paste0(path_to_folder, '/SpatialInfo'), pattern = 'spatVector.shp', full.names = TRUE) @@ -1114,49 +1129,60 @@ loadGiotto = function(path_to_folder, } ## 3.2. centroids - centroid_search_term = gsub(spat_files, pattern = '_spatInfo_spatVector.shp', replacement = '_spatInfo_spatVectorCentroids.shp') + if(isTRUE(verbose)) { + wrap_msg('\n 3.2 read Giotto spatial centroid information \n') + } + centroid_search_term = gsub(spat_files, pattern = '_spatInfo_spatVector.shp', replacement = '_spatInfo_spatVectorCentroids.shp') centroid_paths = sapply(centroid_search_term, function(gp_centroid) { list.files(path = paste0(path_to_folder, '/SpatialInfo'), pattern = gp_centroid, full.names = TRUE) }, USE.NAMES = FALSE) - centroid_files = basename(centroid_paths) + # check if centroid are provided for spatvector polygons + test_missing = unlist(lapply(centroid_paths, FUN = function(x) identical(x, character(0)))) + centroid_paths = centroid_paths[!test_missing] - if(isTRUE(verbose)) { - wrap_msg('\n3.2 read Giotto spatial centroid information \n') - print(centroid_files) - } + if(length(centroid_paths) == 0) { + if(verbose) wrap_msg('No centroids were found, centroid loading will be skipped \n') + } else { - if(length(centroid_files != 0)) { - spat_names = gsub(centroid_files, pattern = '_spatInfo_spatVectorCentroids.shp', replacement = '') + centroid_files = basename(centroid_paths) - vector_names_paths = list.files(path = paste0(path_to_folder, '/SpatialInfo'), pattern = 'spatVectorCentroids_names.txt', full.names = TRUE) + if(length(centroid_files != 0)) { + spat_names = gsub(centroid_files, pattern = '_spatInfo_spatVectorCentroids.shp', replacement = '') - for(spat_i in 1:length(spat_names)) { - spatVector = terra::vect(x = centroid_paths[spat_i]) + vector_names_paths = list.files(path = paste0(path_to_folder, '/SpatialInfo'), pattern = 'spatVectorCentroids_names.txt', full.names = TRUE) - # read in original column names and assign to spatVector - spatVector_names = fread(input = vector_names_paths[spat_i], header = FALSE)[['V1']] - names(spatVector) = spatVector_names + for(spat_i in 1:length(spat_names)) { + spatVector = terra::vect(x = centroid_paths[spat_i]) - spat_name = spat_names[spat_i] - if(isTRUE(verbose)) message(spat_name) - gobject@spatial_info[[spat_name]]@spatVectorCentroids = spatVector + # read in original column names and assign to spatVector + spatVector_names = fread(input = vector_names_paths[spat_i], header = FALSE)[['V1']] + names(spatVector) = spatVector_names + + spat_name = spat_names[spat_i] + if(isTRUE(verbose)) message(spat_name) + gobject@spatial_info[[spat_name]]@spatVectorCentroids = spatVector + } } + } ## 3.3. overlaps - overlap_search_term = gsub(spat_files, pattern = '_spatInfo_spatVector.shp', replacement = '_spatInfo_spatVectorOverlaps.shp') - overlap_files = list.files(path = paste0(path_to_folder, '/SpatialInfo'), pattern = 'spatVectorOverlaps.shp') - if(isTRUE(verbose)) { wrap_msg('\n3.3 read Giotto spatial overlap information \n') - print(overlap_files) } - if(length(overlap_files != 0)) { + overlap_search_term = gsub(spat_files, pattern = '_spatInfo_spatVector.shp', replacement = '_spatInfo_spatVectorOverlaps.shp') + overlap_files = list.files(path = paste0(path_to_folder, '/SpatialInfo'), pattern = 'spatVectorOverlaps.shp') + # check if overlap information is available + if(length(overlap_files) == 0) { + if(verbose) wrap_msg('No overlaps were found, overlap loading will be skipped \n') + } else { + + print(overlap_files) # find overlaps per spatVector for(sv_i in seq_along(overlap_search_term)) { @@ -1187,7 +1213,6 @@ loadGiotto = function(path_to_folder, } } } - } diff --git a/R/giotto_structures.R b/R/giotto_structures.R index ff37060e2..941ec658d 100644 --- a/R/giotto_structures.R +++ b/R/giotto_structures.R @@ -2598,8 +2598,8 @@ combineFeatureData = function(gobject, feat_type = feat, output = 'data.table') - if(!is.null(sel_feats[[feat_type]])) { - selected_features = sel_feats[[feat_type]] + if(!is.null(sel_feats[[feat]])) { + selected_features = sel_feats[[feat]] feat_meta = feat_meta[feat_ID %in% selected_features] } @@ -2609,8 +2609,8 @@ combineFeatureData = function(gobject, feat_type = feat, return_giottoPoints = FALSE) feat_info = spatVector_to_dt(feat_info_spatvec) - if(!is.null(sel_feats[[feat_type]])) { - selected_features = sel_feats[[feat_type]] + if(!is.null(sel_feats[[feat]])) { + selected_features = sel_feats[[feat]] feat_info = feat_info[feat_ID %in% selected_features] } @@ -2672,8 +2672,8 @@ combineFeatureOverlapData = function(gobject, - if(!is.null(sel_feats[[feat_type]])) { - selected_features = sel_feats[[feat_type]] + if(!is.null(sel_feats[[feat]])) { + selected_features = sel_feats[[feat]] feat_meta = feat_meta[feat_ID %in% selected_features] } @@ -2685,8 +2685,8 @@ combineFeatureOverlapData = function(gobject, polygon_overlap = feat) feat_overlap_info = spatVector_to_dt(feat_overlap_info_spatvec) - if(!is.null(sel_feats[[feat_type]])) { - selected_features = sel_feats[[feat_type]] + if(!is.null(sel_feats[[feat]])) { + selected_features = sel_feats[[feat]] feat_overlap_info = feat_overlap_info[feat_ID %in% selected_features] } @@ -2694,7 +2694,7 @@ combineFeatureOverlapData = function(gobject, poly_list[[poly]] = feat_overlap_info } - poly_list_res = do.call('rbind', poly_list) + poly_list_res = rbindlist(poly_list,fill = T) comb_dt = data.table::merge.data.table(x = feat_meta, y = poly_list_res, diff --git a/R/images.R b/R/images.R index d44e9d336..fc9d3b940 100644 --- a/R/images.R +++ b/R/images.R @@ -1545,9 +1545,14 @@ plot_giottoLargeImage = function(gobject = NULL, if(isTRUE(asRGB) | terra::has.RGB(raster_object) | terra::nlyr(raster_object) >= 3) { # Determine likely image bitdepth if(is.null(max_intensity)) { + bitDepth = ceiling(log(x = giottoLargeImage@max_intensity, base = 2)) # Assign discovered bitdepth as max_intensity max_intensity = 2^bitDepth-1 + + if(max_intensity == 0){ + max_intensity = 1 + } } terra::plotRGB(raster_object, diff --git a/R/interoperability.R b/R/interoperability.R index 4333c1ef5..0bd70905c 100644 --- a/R/interoperability.R +++ b/R/interoperability.R @@ -20,56 +20,67 @@ gefToGiotto = function(gef_file, bin_size = 'bin100', verbose = FALSE){ - # data.table vars - genes = y = sdimx = sdimy = cell_ID = count = NULL - - # package check - package_check(pkg_name = 'rhdf5', repository = 'Bioc') - if(!file.exists(gef_file)) stop('File path to .gef file does not exist') - - # check if proper bin_size is selected. These are determined in SAW pipeline. - bin_size_options = c('bin1', 'bin10', 'bin20', 'bin50', 'bin100', 'bin200') - if(!(bin_size %in% bin_size_options)) stop('Please select valid bin size,see details for choices.') - - # step 1: read expression and gene data from gef file - if(isTRUE(verbose)) wrap_msg('reading in .gef file') - geneExpData = rhdf5::h5read(file = gef_file, name = 'geneExp') - exprDT = data.table::as.data.table(geneExpData[[bin_size]][['expression']]) - geneDT = data.table::as.data.table(geneExpData[[bin_size]][['gene']]) - - # step 2: combine gene information from the geneDT to the exprDT - exprDT[, genes := rep(x = geneDT$gene, geneDT$count)] - - # step 3: bin coordinates according to selected bin_size - #TODO: update bin_shift for other shapes, not just rect_vertices - bin_size_int = as.integer(gsub("[^0-9.-]", "", bin_size)) - bin_shift = ceiling(bin_size_int / 2) # ceiling catches bin_1 - bincoord = unique(exprDT[,.(x,y)]) - if(isTRUE(verbose)) wrap_msg('shifting and binning coordinates') - data.table::setorder(bincoord, x, y) - data.table::setnames(bincoord, old = c('x', 'y'), new = c('sdimx', 'sdimy')) - bincoord[, c('sdimx', 'sdimy') := list(sdimx+bin_shift, sdimy+bin_shift)] - bincoord[, cell_ID := paste0('bin', 1:.N)] - tx_data = exprDT[,.(genes, x, y, count)] - tx_data[, c('x', 'y') := list(x+bin_shift, y+bin_shift)] - - # step 4: create rectangular polygons (grid) starting from the bin centroids - if(isTRUE(verbose)) wrap_msg('creating polygon stamp') - x = polyStamp(stamp_dt = rectVertices(dims = c(x = (bin_size_int - 1), - y = (bin_size_int - 1))), - spatlocs = bincoord[,.(cell_ID, sdimx, sdimy)]) - pg = createGiottoPolygonsFromDfr(x) - - # step 5: create giotto subcellular object - stereo = createGiottoObjectSubcellular( - gpoints = list(rna = tx_data), - gpolygons = list(cell = pg) - ) - - stereo = addSpatialCentroidLocations(gobject = stereo) - if(isTRUE(verbose)) wrap_msg('giotto subcellular object created') - - return(stereo) + # data.table vars + genes = y = sdimx = sdimy = cell_ID = count = NULL + + # package check + package_check(pkg_name = 'rhdf5', repository = 'Bioc') + if(!file.exists(gef_file)) stop('File path to .gef file does not exist') + + # check if proper bin_size is selected. These are determined in SAW pipeline. + wrap_msg('1. gefToGiotto() begin... \n') + bin_size_options = c('bin1', 'bin10', 'bin20', 'bin50', 'bin100', 'bin200') + if(!(bin_size %in% bin_size_options)){ + stop('Please select valid bin size, see ?gefToGiotto for details.') + } + + # 1. read .gef file at specific bin size + geneExpData = rhdf5::h5read(file = gef_file, name = paste0('geneExp/', + bin_size)) + geneDT = data.table::as.data.table(geneExpData[['gene']]) + + exprDT = data.table::as.data.table(geneExpData[['expression']]) + exprDT$count = as.integer(exprDT$count) + if(isTRUE(verbose)) wrap_msg('finished reading in .gef', bin_size, '\n') + + # 2. create spatial locations + if(isTRUE(verbose)) wrap_msg('2. create spatial_locations... \n') + cell_locations = unique(exprDT[,c('x','y')], by = c('x', 'y')) + cell_locations[, bin_ID := as.factor(seq_along(1:nrow(cell_locations)))] + cell_locations[, cell_ID := paste0('cell_', bin_ID)] + data.table::setcolorder(cell_locations, c('x', 'y', 'cell_ID', 'bin_ID')) # ensure first non-numerical col is cell_ID + if(isTRUE(verbose)) wrap_msg(nrow(cell_locations), ' bins in total \n') + if(isTRUE(verbose)) wrap_msg('finished spatial_locations \n') + + # 3. create expression matrix + if(isTRUE(verbose)) wrap_msg('3. create expression matrix... \n') + exprDT[, genes := as.character(rep(x = geneDT$gene, geneDT$count))] + exprDT[, gene_idx := as.integer(factor(exprDT$genes, + levels = unique(exprDT$genes)))] + + # merge on x,y and populate based on bin_ID values in cell_locations + exprDT[cell_locations, cell_ID := i.bin_ID, on = .(x, y)] + exprDT$cell_ID <- as.integer(exprDT$cell_ID) + + expMatrix <- Matrix::sparseMatrix(i = exprDT$gene_idx, + j = exprDT$cell_ID, + x = exprDT$count) + + colnames(expMatrix) = cell_locations$cell_ID + rownames(expMatrix) = unique(exprDT, by = c("genes", "gene_idx"))$genes + if(isTRUE(verbose)) wrap_msg('finished expression matrix') + + # 4. create minimal giotto object + if(isTRUE(verbose)) wrap_msg('4. create giotto object... \n') + stereo = createGiottoObject( + expression = expMatrix, + spatial_locs = cell_locations, + verbose = F, + ) + if(isTRUE(verbose)) wrap_msg('finished giotto object... \n') + + wrap_msg('gefToGiotto() finished \n') + return(stereo) } @@ -144,6 +155,7 @@ check_py_for_scanpy = function(){ #' @param spat_unit desired spatial unit for conversion, default NULL #' @param feat_type desired feature type for conversion, default NULL #' @param python_path path to python executable within a conda/miniconda environment +#' @param env_name name of environment containing python_path executable #' @return Giotto object #' @details Function in beta. Converts a .h5ad file into a Giotto object. #' The returned Giotto Object will take default insructions with the @@ -156,7 +168,8 @@ anndataToGiotto = function(anndata_path = NULL, deluanay_spat_net = TRUE, spat_unit = NULL, feat_type = NULL, - python_path = NULL) { + python_path = NULL, + env_name = "giotto_env") { # Preliminary file checks and guard clauses if (is.null(anndata_path)) { @@ -177,7 +190,8 @@ anndataToGiotto = function(anndata_path = NULL, # Required step to properly initialize reticualte instrs = createGiottoInstructions(python_path = python_path) - check_py_for_scanpy() + scanpy_installed = checkPythonPackage("scanpy", env_to_use = env_name) + # should trigger a stop() downstream if not installed # Import ad2g, a python module for parsing anndata ad2g_path <- system.file("python","ad2g.py",package="Giotto") @@ -472,6 +486,7 @@ anndataToGiotto = function(anndata_path = NULL, #' @param spat_unit spatial unit which will be used in conversion. #' @param feat_type feature type which will be used in conversion. #' @param python_path path to python executable within a conda/miniconda environment +#' @param env_name name of environment containing python_path executable #' @param save_directory directory in which the file will be saved. #' @return vector containing .h5ad file path(s) #' @details Function in beta. Converts a Giotto object into .h5ad file(s). @@ -495,12 +510,15 @@ giottoToAnnData <- function(gobject = NULL, spat_unit = NULL, feat_type = NULL, python_path = NULL, + env_name = "giotto_env", save_directory = NULL){ # Check gobject invalid_obj = !("giotto" %in% class(gobject)) if (is.null(gobject) || invalid_obj) { stop(wrap_msg("Please provide a valid Giotto Object for conversion.")) } + + scanpy_installed = checkPythonPackage("scanpy", env_to_use = env_name) # Python module import g2ad_path <- system.file("python","g2ad.py",package="Giotto") @@ -967,13 +985,10 @@ giottoToAnnData <- function(gobject = NULL, return(fname_list) } - - ## Seurat object #### - -#' @title Convert Giotto to Seurat -#' @name giottoToSeurat +#' @title Convert Giotto to Seurat V4 +#' @name giottoToSeuratV4 #' @description Converts Giotto object into a Seurat object. This functions extracts #' specific sets of data belonging to specified spatial unit. #' The default values are 'cell' and 'rna' respectively. @@ -983,27 +998,26 @@ giottoToAnnData <- function(gobject = NULL, #' @param ... additional params to pass to \code{\link{get_spatial_locations}} #' @return Seurat object #' @export -giottoToSeurat <- function(gobject, +giottoToSeuratV4 <- function(gobject, spat_unit = NULL, obj_use = NULL, ...){ - + + #To use Seurat v4 assays + options(Seurat.object.assay.version = 'v4') + # data.table vars feat_type = name = dim_type = nn_type = NULL - if(!is.null(obj_use)) { warning('obj_use param is deprecated. Please use "gobject') gobject = obj_use } - # set default spat_unit and feat_type to be extracted as a Seurat assay spat_unit = set_default_spat_unit(gobject = gobject, spat_unit = spat_unit) - # verify if optional package is installed package_check(pkg_name = "Seurat", repository = "CRAN") requireNamespace('Seurat') - # check whether any raw data exist -- required for Seurat avail_expr = list_expression(gobject = gobject, spat_unit = spat_unit) raw_exist = avail_expr[, 'raw' %in% name, by = feat_type] @@ -1016,7 +1030,6 @@ giottoToSeurat <- function(gobject, } else { stop("Raw count data not found. Required for Seurat object.") } - # create Seurat object when at least one raw data is available for (i in seq_along(assays_all)){ assay_use <- assays_all[i] @@ -1074,7 +1087,6 @@ giottoToSeurat <- function(gobject, assay = assay_use) } } - # add cell metadata meta_cells <- data.table::setDF( get_cell_metadata( @@ -1091,7 +1103,6 @@ giottoToSeurat <- function(gobject, colnames(meta_cells) <- paste0(assay_use,"_",colnames(meta_cells)) } sobj <- Seurat::AddMetaData(sobj,metadata = meta_cells[Seurat::Cells(sobj),]) - # add feature metadata meta_genes <- data.table::setDF( get_feature_metadata( @@ -1103,12 +1114,17 @@ giottoToSeurat <- function(gobject, ) ) rownames(meta_genes) <- meta_genes$feat_ID - sobj[[assay_use]]@meta.features <- cbind(sobj[[assay_use]]@meta.features,meta_genes) - - + if ("meta.data" %in% slotNames(sobj[[assay_use]])) { + sobj[[assay_use]]@meta.data <- cbind(sobj[[assay_use]]@meta.data, meta_genes) + } + else if ("meta.features" %in% slotNames(sobj[[assay_use]])){ + sobj[[assay_use]]@meta.features <- cbind(sobj[[assay_use]]@meta.features,meta_genes) + } + # dim reduction # note: Seurat requires assay name specification for each dim reduc avail_dr = list_dim_reductions(gobject = gobject, spat_unit = spat_unit, feat_type = assay_use,) + if (!is.null(avail_dr)){ if (nrow(avail_dr) > 0){ dr_use = avail_dr[, name] for (i in seq(nrow(avail_dr))){ @@ -1138,12 +1154,11 @@ giottoToSeurat <- function(gobject, } } } - - - + } # network objects # expression network avail_nn = list_nearest_networks(gobject = gobject, spat_unit = spat_unit, feat_type = assay_use) + if (!is.null(avail_nn)){ if (nrow(avail_nn) > 0){ for (i in seq(nrow(avail_nn))){ nn_name = avail_nn[i, name] @@ -1167,9 +1182,8 @@ giottoToSeurat <- function(gobject, sobj[[nn_name]] = sGraph } } - + } } - # spatial coordinates loc_use <- data.table::setDF( get_spatial_locations( @@ -1188,11 +1202,9 @@ giottoToSeurat <- function(gobject, sobj[['spatial']] <- Seurat::CreateDimReducObject(embeddings = as.matrix(loc_2), assay = names(sobj@assays)[1], key = 'spatial_') - - - # spatial network avail_sn = list_spatial_networks(gobject = gobject, spat_unit = spat_unit) + if (!is.null(avail_sn)){ if (nrow(avail_sn) > 0){ sn_all = avail_sn[, name] for (i in sn_all){ @@ -1211,121 +1223,265 @@ giottoToSeurat <- function(gobject, sobj[[nn_name]] <- spatGraph } } - + } return (sobj) } -#' @title seuratToGiotto_OLD -#' @name seuratToGiotto_OLD -#' @description Converts Seurat object into a Giotto object. Deprecated, see \code{\link{giottoToSeurat}} -#' @param obj_use Seurat object -#' @param ... additional params to pass -#' @return Giotto object +#' @title Convert Giotto to Seurat V5 +#' @name giottoToSeuratV5 +#' @description Converts Giotto object into a Seurat object. This functions extracts +#' specific sets of data belonging to specified spatial unit. +#' The default values are 'cell' and 'rna' respectively. +#' @param gobject Giotto object +#' @param obj_use Giotto object (deprecated, use gobject) +#' @param spat_unit spatial unit (e.g. 'cell') +#' @param ... additional params to pass to \code{\link{get_spatial_locations}} +#' @return Seurat object #' @export -seuratToGiotto_OLD <- function(obj_use = NULL, - ...){ +giottoToSeuratV5 <- function(gobject, + spat_unit = NULL, + obj_use = NULL, + ...){ + + #To use new Seurat v5 assays + options(Seurat.object.assay.version = 'v5') + + + # data.table vars + feat_type = name = dim_type = nn_type = NULL + + if(!is.null(obj_use)) { + warning('obj_use param is deprecated. Please use "gobject') + gobject = obj_use + } + + # set default spat_unit and feat_type to be extracted as a Seurat assay + spat_unit = set_default_spat_unit(gobject = gobject, + spat_unit = spat_unit) + + # verify if optional package is installed + package_check(pkg_name = "Seurat", repository = "CRAN") requireNamespace('Seurat') - requireNamespace('Giotto') - - # get general info in basic seurat structures - obj_assays <- names(obj_use@assays) - if ('Spatial' %in% obj_assays){ - obj_assays <- c('Spatial',obj_assays[-which(obj_assays == 'Spatial')]) - } - - obj_dimReduc <- names(obj_use@reductions) - obj_dimReduc_assay <- sapply(obj_dimReduc,function(x) - obj_use[[x]]@assay.used) - - obj_graph_expr <- names(obj_use@graphs) - obj_graph_expr_assay <- sapply(obj_graph_expr,function(x) - obj_use[[x]]@assay.used) - - obj_meta_cells <- obj_use@meta.data - obj_meta_genes <- lapply(obj_assays,function(x) - obj_use[[x]]@meta.features) - names(obj_meta_genes) <- obj_assays - - obj_img <- obj_use@images - obj_img_names <- names(obj_img) - loc_use <- lapply(obj_img_names,function(x){ - temp <- obj_img[[x]]@coordinates - temp <- as.data.frame(temp[,c('col','row')]) - # temp$region <- x - return (temp) - }) - loc_use <- Reduce(rbind,loc_use) - - # add assay data: raw, normalized & scaled - for (i in 1:length(obj_assays)){ - data_raw <- Seurat::GetAssayData(obj_use,slot = 'counts',assay = obj_assays[i]) - data_norm <- Seurat::GetAssayData(obj_use,slot = 'data',assay = obj_assays[i]) - data_scale <- Seurat::GetAssayData(obj_use,slot = 'scale.data',assay = obj_assays[i]) - - if (i == 1 & obj_assays[i] == 'Spatial'){ - feat_use <- 'rna' - test <- createGiottoObject(expression = obj_use[[obj_assays[i]]]@counts, - spatial_locs = loc_use, - expression_feat = 'rna') - test <- addCellMetadata(test,feat_type = feat_use,new_metadata = obj_meta_cells) + + # check whether any raw data exist -- required for Seurat + avail_expr = list_expression(gobject = gobject, spat_unit = spat_unit) + raw_exist = avail_expr[, 'raw' %in% name, by = feat_type] + # raw_exist <- sapply(gobject@expression_feat,function(x) + # 'raw' %in% names(gobject@expression[[x]])) + if (nrow(raw_exist) > 0){ + assays_all = raw_exist[, feat_type] + # assays_all <- names(raw_exist[1]) + # assays_all <- union(assays_all,gobject@expression_feat) + } else { + stop("Raw count data not found. Required for Seurat object.") + } + + # create Seurat object when at least one raw data is available + for (i in seq_along(assays_all)){ + assay_use <- assays_all[i] + expr_use = lapply(avail_expr[feat_type == assay_use, name], + function(x) { + get_expression_values(gobject = gobject, + spat_unit = spat_unit, + feat_type = assay_use, + values = x, + output = 'exprObj') + }) + # expr_use <- gobject@expression[[assay_use]] + names(expr_use) = unlist(lapply(expr_use, objName)) + slot_use <- names(expr_use) + if (i == 1){ + data_raw <- expr_use[['raw']][] + sobj <- Seurat::CreateSeuratObject(counts = data_raw, + assay = assay_use) + if ('normalized' %in% slot_use){ + sobj <- Seurat::SetAssayData(sobj,slot = 'data', + new.data = expr_use[['normalized']][], + assay = assay_use) + } + if ('scaled' %in% slot_use){ + sobj <- Seurat::SetAssayData(sobj,slot = 'scale.data', + # does not accept 'dgeMatrix' + new.data = as.matrix(expr_use[['scaled']][]), + assay = assay_use) + } } else { - feat_use <- obj_assays[i] - test@expression[[feat_use]][['raw']] <- data_raw - test@feat_ID[[feat_use]] = rownames(data_raw) - test@feat_metadata[[feat_use]] = data.table::data.table(feat_ID = test@feat_ID[[feat_use]]) + if ('raw' %in% slot_use){ + data_raw <- expr_use[['raw']][] + flag_raw <- 1 + } else { + flag_raw <- 0 + } + if ('normalized' %in% slot_use){ + data_norm <- expr_use[['normalized']][] + flag_norm <- 1 + } else { + flag_norm <- 0 + } + if (flag_raw == 1){ + assay_obj <- Seurat::CreateAssayObject(counts = data_raw) + } else if (flag_raw == 0 & flag_norm == 1){ + assay_obj <- Seurat::CreateAssayObject(data = data_norm) + } else { + stop(paste0('Raw and normalized data not found for assay ',assay_use)) + } + sobj[[assay_use]] <- assay_obj + if ('scaled' %in% slot_use){ + data_scale <- as.matrix(expr_use[['scaled']][]) + sobj <- Seurat::SetAssayData(sobj,slot = "scale.data", + new.data = data_scale, + assay = assay_use) + } } - if (nrow(data_norm) > 0){ - test@expression[[feat_use]][['normalized']] <- data_norm + + # add cell metadata + meta_cells <- data.table::setDF( + get_cell_metadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = assay_use, + output = 'data.table', + copy_obj = TRUE + ) + ) + rownames(meta_cells) <- meta_cells$cell_ID + meta_cells <- meta_cells[,-which(colnames(meta_cells) == 'cell_ID')] + if(ncol(meta_cells) > 0) { + colnames(meta_cells) <- paste0(assay_use,"_",colnames(meta_cells)) } - if (nrow(data_scale) > 0){ - test@expression[[feat_use]][['scaled']] <- data_scale + sobj <- Seurat::AddMetaData(sobj,metadata = meta_cells[Seurat::Cells(sobj),]) + + # add feature metadata + meta_genes <- data.table::setDF( + get_feature_metadata( + gobject = gobject, + spat_unit = spat_unit, + feat_type = assay_use, + output = 'data.table', + copy_obj = TRUE + ) + ) + rownames(meta_genes) <- meta_genes$feat_ID + sobj@assays$rna@meta.data<- meta_genes + + # dim reduction + # note: Seurat requires assay name specification for each dim reduc + avail_dr = list_dim_reductions(gobject = gobject, spat_unit = spat_unit, feat_type = assay_use,) + if (!is.null(avail_dr)){ + if (nrow(avail_dr) > 0){ + dr_use = avail_dr[, name] + for (i in seq(nrow(avail_dr))){ + dr_name = avail_dr[i, name] + dr_type = avail_dr[i, dim_type] + dr_obj <- get_dimReduction( + gobject = gobject, + output = 'dimObj', + spat_unit = spat_unit, + feat_type = assay_use, + reduction_method = dr_type, + name = dr_name + ) + emb_use <- dr_obj[][Seurat::Cells(sobj),] + if (sum(c('loadings','eigenvalues') %in% names(slot(dr_obj, 'misc'))) == 2){ + loadings_use <- slot(dr_obj, 'misc')$loadings + stdev_use <- slot(dr_obj, 'misc')$eigenvalues + sobj[[dr_name]] <- Seurat::CreateDimReducObject(embeddings = as.matrix(emb_use), + loadings = loadings_use, + key = paste0(dr_name, '_'), + stdev = stdev_use, + assay = assay_use) + } else { + sobj[[dr_name]] <- Seurat::CreateDimReducObject(embeddings = as.matrix(emb_use), + key = paste0(dr_name, '_'), + assay = assay_use) + } + } } - - # gene metadata - if (length(obj_meta_genes[[i]]) > 0){ - test <- addFeatMetadata(test,feat_type = feat_use, - new_metadata = obj_meta_genes[[i]]) + } + + + + # network objects + # expression network + avail_nn = list_nearest_networks(gobject = gobject, spat_unit = spat_unit, feat_type = assay_use) + if (!is.null(avail_nn)){ + if (nrow(avail_nn) > 0) { + for (i in seq(nrow(avail_nn))){ + nn_name = avail_nn[i, name] + nn_type = avail_nn[i, nn_type] + nn_use <- get_NearestNetwork( + gobject = gobject, + spat_unit = spat_unit, + feat_type = assay_use, + nn_network_to_use = nn_type, + network_name = nn_name, + output = 'data.table' + ) + idx1 <- match(nn_use$from,Seurat::Cells(sobj)) + idx2 <- match(nn_use$to,Seurat::Cells(sobj)) + edge_weight <- nn_use$weight + nn_mtx <- Matrix::sparseMatrix(i = idx1,j = idx2,x = edge_weight,dims = c(ncol(sobj),ncol(sobj))) + rownames(nn_mtx) <- colnames(nn_mtx) <- Seurat::Cells(sobj) + nn_name <- paste0('expr_',nn_name) + sGraph <- Seurat::as.Graph(nn_mtx) + sGraph@assay.used = assay_use + sobj[[nn_name]] = sGraph + } } } - - # add dim reduction - for (i in obj_dimReduc){ - if (!i %in% c('pca','umap','tsne')){ - next - } else { - dimReduc_name <- i - dimReduc_method <- i - dimReduc_coords <- obj_use[[i]]@cell.embeddings - dimReduc_misc <- list(obj_use[[i]]@stdev, - obj_use[[i]]@feature.loadings, - obj_use[[i]]@feature.loadings.projected) - names(dimReduc_misc) <- c('eigenvalues','loadings','loadings_projected') - dimObject <- create_dim_obj(name = dimReduc_name, - reduction_method = dimReduc_method, - coordinates = dimReduc_coords, - misc = dimReduc_misc) - test@dimension_reduction[['cells']][[dimReduc_method]][[dimReduc_name]] <- dimObject + } + + # spatial coordinates + loc_use <- data.table::setDF( + get_spatial_locations( + gobject = gobject, + spat_unit = spat_unit, + output = 'data.table', + copy_obj = TRUE, + ... # allow setting of spat_loc_name through additional params + ) + ) + rownames(loc_use) <- loc_use$cell_ID + sobj <- Seurat::AddMetaData(sobj, metadata = loc_use) + # add spatial coordinates as new dim reduct object + loc_2 <- loc_use[,c('sdimx','sdimy')] + colnames(loc_2) <- c('spatial_1','spatial_2') + sobj[['spatial']] <- Seurat::CreateDimReducObject(embeddings = as.matrix(loc_2), + assay = names(sobj@assays)[1], + key = 'spatial_') + + + + # spatial network + avail_sn = list_spatial_networks(gobject = gobject, spat_unit = spat_unit) + if (!is.null(avail_nn)){ + if (nrow(avail_sn) > 0){ + sn_all = avail_sn[, name] + for (i in sn_all){ + snt_use <- get_spatialNetwork(gobject = gobject, + spat_unit = spat_unit, + name = i, + output = 'networkDT') + idx1 <- match(snt_use$from,Seurat::Cells(sobj)) + idx2 <- match(snt_use$to,Seurat::Cells(sobj)) + edge_weight <- snt_use$weight + nn_mtx <- Matrix::sparseMatrix(i = idx1,j = idx2,x = edge_weight,dims = c(ncol(sobj),ncol(sobj))) + rownames(nn_mtx) <- colnames(nn_mtx) <- Seurat::Cells(sobj) + nn_name <- paste0('spatial_',i) + spatGraph = Seurat::as.Graph(nn_mtx) + spatGraph@assay.used = names(sobj@assays)[1] + sobj[[nn_name]] <- spatGraph } } - - # add expr nearest neighbors - for (i in obj_graph_expr){ - mtx_use <- obj_use[[i]] - ig_use <- igraph::graph_from_adjacency_matrix(mtx_use,weighted = T) - g_type <- unlist(strsplit(i,split = "_"))[2] - g_val <- i - test@nn_network[[g_type]][[g_val]][['igraph']] <- ig_use } - return (test) - + + return (sobj) } - - - -#' Convert a Seurat object to a Giotto object -#' +#' +#' @title Convert a Seurat V4 object to a Giotto object +#' @name seuratToGiottoV4 #' @param sobject Input Seurat object to convert to Giotto object #' @param spatial_assay Specify name of the spatial assay slot in Seurat. Default is \code{"Spatial"}. #' @param dim_reduction Specify which dimensional reduction computations to fetch from @@ -1334,30 +1490,27 @@ seuratToGiotto_OLD <- function(obj_use = NULL, #' Default is \code{"Vizgen"}. #' @return A Giotto object converted from Seurat object with all computations stored in it. #' @export -seuratToGiotto = function(sobject, +seuratToGiottoV4 = function(sobject, spatial_assay = 'Spatial', dim_reduction = c('pca','umap'), subcellular_assay = 'Vizgen'){ package_check('Seurat') - + if(is.null(Seurat::GetAssayData(object = sobject, slot = "counts", assay = spatial_assay))) { wrap_msg('No raw expression values are provided in spatial_assay') return(sobject) - } else { - exp = Seurat::GetAssayData(object = sobject, slot = "counts", assay = spatial_assay) if(!is.null(sobject@assays$SCT)){ normexp = Seurat::GetAssayData(object = sobject, slot = "counts", assay = 'SCT') } - + if ("data" %in% slotNames(slot(sobject, 'assays')[[spatial_assay]])){ if(!is.null(slot(sobject, 'assays')[[spatial_assay]]@data)){ normexp = Seurat::GetAssayData(object = sobject, slot = "data", assay = spatial_assay) + } } - # Cell Metadata cell_metadata = sobject@meta.data - # Dimension Reduction if(sum(sapply(dim_reduction,function(x) length(sobject@reductions[[x]]))) == 0) { dimReduc_list = NULL @@ -1381,26 +1534,210 @@ seuratToGiotto = function(sobject, provenance = NULL, coordinates = dim_coord, misc = list(eigenvalues = dim_eig, loadings = dim_load)) - return(dimReduc) }) # names(dimReduc_list) <- dim_reduction } + # Spatial Locations + # if(length(sobject@assays[[spatial_assay]]) == 0) { + # spat_loc = NULL + # } else { + + # !requires image objects! + if (!is.null(Seurat::Images(object = sobject, assay = spatial_assay))) { + spat_coord = Seurat::GetTissueCoordinates(sobject) + spat_coord = cbind(rownames(spat_coord), data.frame(spat_coord, row.names=NULL)) + colnames(spat_coord) = c("cell_ID", "sdimy", "sdimx") + spat_loc = spat_coord + } else { + message("Images for RNA assay not found in the data. Skipping image processing.") + # spat_loc = NULL + } + + # } + if (length(sobject@assays[[spatial_assay]]) == 0 || is.null(Seurat::Images(object = sobject, assay = spatial_assay))) { + spat_loc <- data.frame( + sdimx = rep(0, length(colnames(sobject))), + sdimy = rep(0, length(colnames(sobject))) + ) + } + + + # Subcellular + name = names(sobject@images) + # if(length(sobject@assays[[subcellular_assay]]) == 1) { + # spat_coord = Seurat::GetTissueCoordinates(sobject) + # colnames(spat_coord) = c("sdimx", "sdimy", "cell_ID") + # exp = exp[ , c(intersect(spat_coord$cell_ID, colnames(exp)))] + # spat_loc = spat_coord + # } + + # if (!length(sobject@images) == 0) { + # if ("molecules" %in% methods::slotNames(sobject@images[[name]]) == TRUE) { + # if(!length(sobject@images[[name]][["molecules"]]) == 0) { + # assay = names(sobject@assays) + # featnames = rownames(sobject@assays[[assay]]@meta.features) + # mol_spatlocs = data.table::data.table() + # for (x in featnames) { + # df = (Seurat::FetchData(sobject[[name]][["molecules"]], vars = x)) + # mol_spatlocs = rbind(mol_spatlocs, df) + # } + # gpoints = createGiottoPoints(mol_spatlocs, feat_type = "rna") + # } + # } + # } + } + + if (length(sobject@assays[[subcellular_assay]]) == 0) { + cat("No subcellular information found.\n") + } + else{ + cat("Please use function \"seuratToGiottoV5\" for handling subcellular information.\n") + } + if (length(sobject@images) == 0) { + cat("No images found\n") + } + else{ + cat("Please use function \"seuratToGiottoV5\" for handling image information.\n") + } + + gobject = createGiottoObject(exp, + spatial_locs = spat_loc, + dimension_reduction = dimReduc_list) + if (exists('normexp') == TRUE) { + exprObj = create_expr_obj(name = 'normalized', + exprMat = normexp, + spat_unit = 'cell', + feat_type = 'rna', + provenance = 'cell') + gobject = set_expression_values(gobject = gobject, values = exprObj, set_defaults = FALSE) + # gobject@expression$cell$rna$normalized = normexp + } + gobject = addCellMetadata(gobject = gobject, new_metadata = cell_metadata) + if (exists('gpoints') == TRUE) { + gobject = addGiottoPoints(gobject = gobject, + gpoints = list(gpoints)) + } + return (gobject) +} +#' @title Convert a Seurat V5 object to a Giotto object +#' @name seuratToGiottoV5 +#' @param sobject Input Seurat object to convert to Giotto object +#' @param spatial_assay Specify name of the spatial assay slot in Seurat. Default is \code{"Spatial"}. +#' @param dim_reduction Specify which dimensional reduction computations to fetch from +#' input Seurat object. Default is \code{"c('pca', 'umap')"}. +#' @param subcellular_assay Specify name of the subcellular assay in input object. +#' Default is \code{"Vizgen"}. +#' @return A Giotto object converted from Seurat object with all computations stored in it. +#' @export +seuratToGiottoV5 = function(sobject, + spatial_assay = 'Spatial', + dim_reduction = c('pca','umap'), + subcellular_assay = 'Vizgen', + spatial_loc = 'spatial'){ + package_check('Seurat') + if(is.null(Seurat::GetAssayData(object = sobject, slot = "counts", assay = spatial_assay))) { + wrap_msg('No raw expression values are provided in spatial_assay') + return(sobject) + + } else { + + exp = Seurat::GetAssayData(object = sobject, slot = "counts", assay = spatial_assay) + if(!is.null(sobject@assays$SCT)){ + normexp = Seurat::GetAssayData(object = sobject, slot = "counts", assay = 'SCT') + } + + if("data" %in% slotNames(sobject@assays[[spatial_assay]])){ + if(!is.null(slot(sobject, 'assays')[[spatial_assay]]@data)){ + normexp = Seurat::GetAssayData(object = sobject, slot = "data", assay = spatial_assay) + } + } + if("layers" %in% slotNames(sobject@assays[[spatial_assay]])){ + if(!is.null(slot(sobject, 'assays')[[spatial_assay]]@layers)){ + normexp = LayerData(object = sobject, assay = spatial_assay) + } + } + + # Cell Metadata + cell_metadata = sobject@meta.data + # Feat Metadata + feat_metadata = sobject[[]] + + # Dimension Reduction + if (sum(sapply(dim_reduction, function(x) length(sobject@reductions[[x]]))) == 0) { + dimReduc_list = NULL + } else { + dimReduc_list = lapply(dim_reduction, function(x) { + dim_coord = as.matrix(Seurat::Embeddings(object = sobject, reduction = x)) + dim_load = as.matrix(Seurat::Loadings(object = sobject, reduction = x)) + dim_eig = Seurat::Stdev(sobject, reduction = x) + + if (length(dim_eig) > 0) { + dim_eig = sapply(dim_eig, function(y) y ^ 2) + } + + colnames(dim_coord) = paste0('Dim.', 1:ncol(dim_coord)) + + if (length(dim_load) > 0) { + colnames(dim_load) = paste0('Dim.', 1:ncol(dim_load)) + } + + dimReduc = create_dim_obj(name = x, + reduction = 'cells', + reduction_method = x, + spat_unit = 'cell', + feat_type = 'rna', + provenance = NULL, + coordinates = dim_coord, + misc = list(eigenvalues = dim_eig, loadings = dim_load)) + + return(dimReduc) + }) + # names(dimReduc_list) <- dim_reduction + # 'dimReduc_list' now contains the list of 'dimReduc' objects for each element in 'dim_reduction' + + } + # Spatial Locations if(length(sobject@assays[[spatial_assay]]) == 0) { spat_loc = NULL } else { - + # !requires image objects! - spat_coord = Seurat::GetTissueCoordinates(sobject) - spat_coord = cbind(rownames(spat_coord), data.frame(spat_coord, row.names=NULL)) - colnames(spat_coord) = c("cell_ID", "sdimy", "sdimx") - spat_loc = spat_coord + if (!is.null(Seurat::Images(object = sobject, assay = spatial_assay))) { + spat_coord = Seurat::GetTissueCoordinates(sobject) + # spat_coord = cbind(rownames(spat_coord), data.frame(spat_coord, row.names=NULL)) + + if(!("cell" %in% spat_coord)){ + spat_coord$cell_ID <- rownames(spat_coord) + colnames(spat_coord) = c("sdimx", "sdimy", "cell_ID" ) + } + else{ + colnames(spat_coord) = c("sdimx", "sdimy", "cell_ID") + } + + spat_loc = spat_coord + length_assay <-length(colnames(sobject)) + + spat_datatable <- data.table(cell_ID = character(length_assay), + sdimx = rep(NA_real_, length_assay), + sdimy = rep(NA_real_, length_assay)) + + spat_datatable$cell_ID <- colnames(sobject) + match_cell_ID <- match(spat_loc$cell_ID, spat_datatable$cell_ID) + matching_indices <- match_cell_ID + matching_indices <- matching_indices[!is.na(matching_indices)] + spat_datatable[matching_indices, c("sdimx", "sdimy") := list(spat_loc$sdimx, spat_loc$sdimy)] + spat_loc <- spat_datatable + + } else { + message("Images for RNA assay not found in the data. Skipping image processing.") + spat_loc = NULL + } + } - - - # Subcellular + #Subcellular name = names(sobject@images) if(length(sobject@assays[[subcellular_assay]]) == 1) { @@ -1410,24 +1747,108 @@ seuratToGiotto = function(sobject, spat_loc = spat_coord } if (!length(sobject@images) == 0) { - if ("molecules" %in% methods::slotNames(sobject@images[[name]]) == TRUE) { - if(!length(sobject@images[[name]][["molecules"]]) == 0) { + for (i in names(sobject@images)){ + if ("molecules" %in% names(sobject@images[[i]]) == TRUE) { + if(!length(sobject@images[[i]][["molecules"]]) == 0) { assay = names(sobject@assays) - featnames = rownames(sobject@assays[[assay]]@meta.features) + featnames = rownames(sobject) mol_spatlocs = data.table::data.table() for (x in featnames) { - df = (Seurat::FetchData(sobject[[name]][["molecules"]], vars = x)) + df = (Seurat::FetchData(sobject[[i]][["molecules"]], vars = x)) mol_spatlocs = rbind(mol_spatlocs, df) } gpoints = createGiottoPoints(mol_spatlocs, feat_type = "rna") + if("centroids" %in% names(sobject@images[[i]])){ + centroids_coords <- sobject@images[[i]]$centroids@coords + centroids_coords <- vect(centroids_coords) + gpolygon <- create_giotto_polygon_object(name = "cell", spatVector = centroids_coords) + } + if ("segmentation" %in% names(sobject@images[[i]])){ + + polygon_list <- list() + + for (j in seq(sobject@images[[i]]@boundaries$segmentation@polygons)) { + polygon_info <- sobject@images[[i]]@boundaries$segmentation@polygons[[j]] + + #Get coordinates from segmentation + seg_coords <- polygon_info@Polygons[[1]]@coords + + #Fetch cell_Id from polygon information + cell_ID <- polygon_info@ID + + #Convert it to SpatVector + seg_coords <- vect(seg_coords) + + #Create giotto_polygon_object + gpolygon <- create_giotto_polygon_object(name = "cell", spatVector = centroids_coords ,spatVectorCentroids = seg_coords) + + # Add the cell_ID to the list of polygon names + polygon_list[[cell_ID]] <- gpolygon } + } } + } } + } } + if(length(spatial_loc) != 0){ + if(spatial_loc %in% names(sobject@reductions)){ + cell_ID <- rownames(sobject@reductions[[spatial_loc]]) + spat_reduct <- cbind(sobject@reductions[[spatial_loc]]@cell.embeddings, cell_ID) + colnames(spat_reduct) <- c("sdimx", "sdimy", "cell_ID") + spat_loc <- spat_reduct + spat_loc <- as.data.frame(spat_loc) + length_assay <-length(colnames(sobject)) + spat_datatable <- data.table(cell_ID = character(length_assay), + sdimx = rep(NA_real_, length_assay), + sdimy = rep(NA_real_, length_assay)) + + spat_datatable$cell_ID <- colnames(sobject) + match_cell_ID <- match(spat_loc$cell_ID, spat_datatable$cell_ID) + matching_indices <- match_cell_ID + matching_indices <- matching_indices[!is.na(matching_indices)] + spat_datatable[matching_indices, c("sdimx", "sdimy") := list(as.numeric(spat_loc$sdimx),as.numeric(spat_loc$sdimy))] + spat_loc <- spat_datatable + } + else{ + message("spatial_loc: '", spatial_loc, "' spatial location were not found in the object") + } + } + #Find SueratImages, extract them, and pass to create seuratobj + + for (i in names(sobject@images)){ + + #check if image slot has image in it + if("image" %in% slotNames(sobject@images[[i]])){ + if(!is.null(sobject@images[[i]]@image)){ + + # Extract the red (r), green (g), and blue (b) channels + r <- as.matrix(sobject@images[[i]]@image[,,1]) + g <- as.matrix(sobject@images[[i]]@image[,,2]) + b <- as.matrix(sobject@images[[i]]@image[,,3]) + + # Convert channels to rasters + r <- terra::rast(r) + g <- terra::rast(g) + b <- terra::rast(b) + + # Create Giotto LargeImage + gImg <- createGiottoLargeImage(terra::rast(list(r, g, b))) + } + } + } + + if (is.null(spat_loc)) { + spat_loc <- data.frame( + sdimx = rep(0, length(colnames(sobject))), + sdimy = rep(0, length(colnames(sobject))) + ) + } + gobject = createGiottoObject(exp, spatial_locs = spat_loc, dimension_reduction = dimReduc_list) @@ -1441,17 +1862,27 @@ seuratToGiotto = function(sobject, # gobject@expression$cell$rna$normalized = normexp } gobject = addCellMetadata(gobject = gobject, new_metadata = cell_metadata) - - + gobject = addFeatMetadata(gobject = gobject, new_metadata = feat_metadata) + + if (exists('gpoints') == TRUE) { gobject = addGiottoPoints(gobject = gobject, gpoints = list(gpoints)) } - + + if(exists('gpolygon') == TRUE){ + gobject = addGiottoPolygons(gobject = gobject, gpolygons = polygon_list) + } + + if(exists('gImg') == TRUE){ + gobject = addGiottoLargeImage(gobject = gobject, largeImages = list(gImg)) + } return (gobject) } + + ## SpatialExperiment object #### #' Utility function to convert a Giotto object to a SpatialExperiment object. @@ -1683,7 +2114,7 @@ giottoToSpatialExperiment <- function(giottoObj, verbose = TRUE){ #' all existing networks. #' @param sp_network Specify the name of the spatial network(s) in the input #' SpatialExperiment object. Default \code{NULL} will use all existing -#' networks. +#' networks. This can be a vector of multiple network names. #' @param verbose A boolean value specifying if progress messages should #' be displayed or not. Default \code{TRUE}. #' @import data.table @@ -1731,6 +2162,9 @@ spatialExperimentToGiotto <- function(spe, # Phenotype Data pData <- SummarizedExperiment::colData(spe) + + # To handle cell_ID duplication issues + if ("cell_ID" %in% colnames(pData)) {pData$`cell_ID` <- NULL} if(nrow(pData) > 0){ if(verbose) message("Copying phenotype data") giottoObj <- addCellMetadata(gobject = giottoObj, new_metadata = as.data.table(pData)) @@ -1789,11 +2223,14 @@ spatialExperimentToGiotto <- function(spe, networks <- SingleCellExperiment::colPairs(spe) # Spatial Networks if(!is.null(sp_network)){ - if(sp_network %in% names(networks)){ + if(all(sp_network %in% names(networks))){ for(i in seq(sp_network)){ if(verbose) message("Copying spatial networks") + networkDT = as.data.table(networks[[sp_network[i]]]) + networkDT$to <-colnames(spe)[networkDT$to] + networkDT$from <- colnames(spe)[networkDT$from] spatNetObj <- create_spat_net_obj( - networkDT = as.data.table(networks[[sp_network[i]]])) + networkDT = networkDT) giottoObj <- set_spatialNetwork(gobject = giottoObj, spatial_network = spatNetObj, name = sp_network[i]) diff --git a/R/python_bento.R b/R/python_bento.R new file mode 100644 index 000000000..812ae5736 --- /dev/null +++ b/R/python_bento.R @@ -0,0 +1,42 @@ +#' @title Create bento adata object from gobject +#' @name createBentoAdata +#' @description Create bento adata object from gobject +#' @param gobject Giotto object +#' @param env_to_use Python environment within which bento is installed. +#' If it is not already installed, the user +#' will be prompted to install `bento-tools` +#' DEFAULT: "giotto_env" +#' @return bento_adata bento adata object +#' @export +createBentoAdata <- function(gobject = NULL, + env_to_use = "giotto_env"){ + if(!c("giotto") %in% class(gobject)) stop(wrap_txt("Please provide a valid Giotto Object.", errWidth=TRUE)) + # Transcripts + transcripts_df <- as.data.frame(sf::st_as_sf(gobject@feat_info$rna@spatVector)) + coordinates_df <- lapply(transcripts_df['geometry'], sf::st_coordinates)$geometry + t_df <- as.data.frame(cbind(coordinates_df, transcripts_df[c("feat_ID")])) + colnames(t_df) <- c('x','y','gene') + + # Cell shapes + # TODO: Add batch information based on? + cell_poly <- spatVector_to_dt(gobject@spatial_info$cell@spatVector) + cell_poly <- data.frame(cell_id = cell_poly$poly_ID, x = cell_poly$x, y = cell_poly$y, batch = 0L) + + # Nuclei shapes + # TODO: Add batch information based on? + nucleus_poly <- spatVector_to_dt(gobject@spatial_info$nucleus@spatVector) + nucleus_poly <- data.frame(cell_id = nucleus_poly$poly_ID, x = nucleus_poly$x, y = nucleus_poly$y, batch = 0L) + + # Install bento-tools / Check python environment for bento-tools + bento_installed = checkPythonPackage(github_package_url = "git+https://github.com/wwang-chcn/bento-tools.git", + env_to_use = env_to_use) + # Will crash downstream if installation unsuccessful/denied + # or if the package is not found. + + # Create AnnData object + g2bento_path <- system.file("python","g2bento.py",package="Giotto") + reticulate::source_python(g2bento_path) + bento_adata <- create_AnnData(trainscripts=t_df, cell_shape=cell_poly, nucleus_shape=nucleus_poly) + + return(bento_adata) +} diff --git a/R/python_environment.R b/R/python_environment.R index 84a5a8af8..60522acc4 100644 --- a/R/python_environment.R +++ b/R/python_environment.R @@ -127,7 +127,7 @@ install_giotto_environment_specific = function(packages_to_install = c('pandas', py_lou = packages_to_install[grepl(pattern = 'python-louvain',x = packages_to_install)] - pip_packages = c("smfishhmrf", py_lou) + pip_packages = c("smfishhmrf", "session-info", py_lou) # python-louvain must be installed with pip, not with conda-forge packages_to_install = packages_to_install[!grepl(pattern = 'python-louvain',x = packages_to_install)] @@ -194,7 +194,6 @@ install_giotto_environment_specific = function(packages_to_install = c('pandas', python_version = python_version) } - } @@ -489,3 +488,227 @@ set_giotto_python_path = function(python_path = NULL, return(python_path) } +#' @title Prompt User for Python Install +#' @name py_install_prompt +#' @param package python package/github url +#' @param env environment into which package will be installed +#' @description prompts user to install a package +#' @keywords internal +py_install_prompt <- function (package = NULL, + env = NULL){ + if(is.null(package) | is.null(env)) { + stop(wrap_txt("Incorrect Usage.\n", errWidth = TRUE)) + } + + install_py_pkg_msg = paste0("Python package `", + package, + "` is required and not installed.\n") + warning(install_py_pkg_msg, immediate. = TRUE) + install_py_pkg_msg = paste0("Enter 0 to skip installation and quit.\n") + install_py_pkg_msg = paste0(install_py_pkg_msg, "Enter any other number ", + "to install\n`", + package, + "`\nto python environment: `", + env, '`\n\n') + resp = as.integer(readline(prompt = install_py_pkg_msg)) + return(resp) + +} + +#' @title Install Package from GitHub Link +#' @name install_github_link_pip +#' @param link link to github repository containing a python package, +#' e.g. `git+https://github.com/TencentAILabHealthcare/pysodb.git` +#' @param env conda environment to which `link` will be installed via pip +#' @description +#' Installs `link` to python `env` +#' @keywords internal +install_github_link_pip <- function(link = NULL, + env = NULL){ + # Guard + if (is.null(link) | is.null(env)) stop(wrap_txt("Incorrect Usage.", errWidth = TRUE)) + + config <- reticulate::py_discover_config(use_environment=env) + # system commands return 0 if ran successfully, 1 otherwise + successful_install = !as.logical(system2(config$python, + c("-m", + "pip", + "install", + link))) + if (successful_install) { + return(TRUE) + } else { + git_url_err_msg = "Provided GitHub URL `" + git_url_err_msg = paste0(git_url_err_msg, github_package_URL, "`") + git_url_err_msg = paste0(git_url_err_msg, " Could not be installed.\n") + git_url_err_msg = paste0(git_url_err_msg, + "Please try again with a different URL.") + stop(GiottoUtils::wrap_txt(git_url_err_msg, errWidth = TRUE)) + } +} + +#' @title Install Python Package with Reticulate +#' @name install_py_pkg_reticulate +#' @param package name of python package +#' @param env name of the environment into which the python +#' package should be installed. +#' @details +#' Installs `package` to python `env` after prompting user. +#' Installation is done via `py_install` from the +#' `reticulate` package. +#' @keywords internal +install_py_pkg_reticulate <- function(package = NULL, + env = NULL){ + resp = py_install_prompt(package = package, + env = env) + if(resp != 0){ + try_install = tryCatch(expr = { + reticulate::py_install(package = package, + envname = env) + return(TRUE) + + }, error = function(e){ + return(FALSE) + }) + + if (!try_install){ + cannot_install_msg = paste0("Could not install `",package) + cannot_install_msg = paste0(cannot_install_msg, "` using `reticulate::py_install()`\n") + + GiottoUtils::wrap_msg(cannot_install_msg, + errWidth = TRUE) + return(FALSE) + } + } else { + stop(GiottoUtils::wrap_txt("Package not installed.\n", errWidth = TRUE)) + } +} + +#' @title Check Python Package Installation +#' @name checkPythonPackage +#' @param package_name name of python package. See details. +#' @param github_package_url URL linking to github repository containing +#' a python package that may be installed with pip, +#' e.g. `git+https://github.com/TencentAILabHealthcare/pysodb.git`; +#' see details. +#' @param env_to_use name of the environment into which the python +#' package should be installed. +#' @description checks python environment for a +#' provided package, installs if it is not found. +#' @details +#' Parameter `github_package_url` takes precedent over +#' `package_name`, i.e. if both are provided, only the github +#' URL will be installed. This function should only be provided +#' one parameter, or the other. +#' +#' @keywords export +checkPythonPackage <- function (package_name = NULL, + github_package_url = NULL, + env_to_use = "giotto_env"){ + # Guard clauses + if (is.null(package_name) & is.null(github_package_url)){ + null_input_err_msg = "A python package name must be provided, e.g. `scanpy==1.9.0`" + null_input_err_msg = paste0(null_input_err_msg, + "\nAlternatively, provide a github package URL, ") + null_input_err_msg = paste0(null_input_err_msg, + "e.g. `git+https://github.com/TencentAILabHealthcare/pysodb.git` ") + stop(GiottoUtils::wrap_txt(null_input_err_msg, + errWidth = TRUE)) + } + # Find path to currently initialized python env + path_to_env = reticulate::py_config()$pythonhome + + if (!grepl(env_to_use, path_to_env)){ + env_err_msg = paste0("Provided python environment `", + env_to_use,"` is not initialized.") + env_err_msg = paste0(env_err_msg, + "\nThe following python environment is in use: `", + path_to_env,"`") + env_err_msg = paste0(env_err_msg, + "\nTo initialize `",env_to_use,"`, you must ", + "restart your R session.") + + stop(GiottoUtils::wrap_txt(env_err_msg, + errWidth = TRUE)) + } + + env_str_location = stringr::str_locate(path_to_env, env_to_use)[2] + # Change env_to_use from name of environment + # to the full environment path + env_to_use = substr(path_to_env, 1, env_str_location) + + # If a github link is provided, install it and exit + if (!is.null(github_package_url)){ + resp = py_install_prompt(package = github_package_url, + env = env_to_use) + if (resp != 0){ + install_status = install_github_link_pip(link = github_package_url, + env = env_to_use) + return(install_status) + } else { + stop(GiottoUtils::wrap_txt("Package not installed.\n", errWidth = TRUE)) + } + } + + package_config = reticulate::py_list_packages() + pkgs_in_py_env = package_config$package + versions = package_config$version + + # package installed, right version --> exit + # package installed, but wrong version --> prompt for install + # package not installed --> prompt for install + + version_number = NULL + + contains_version_number = grepl("==", package_name) + if (contains_version_number){ + split_package_version = stringr::str_split(package_name, + pattern = "==") + package_name = split_package_version[[1]][1] + version_number = split_package_version[[1]][2] + } + + if (package_name %in% pkgs_in_py_env){ + if (!contains_version_number){ + # If a version number is not provided, + # and the package exists within the + # reticulate package list, exit, + # since it is already installed. + return(TRUE) + + } else { + # Check that the version numbers match, if provided + idx = which(pkgs_in_py_env == package_name) + version_match = (version_number == versions[idx]) + + if (version_match){ + # if the versions match, the right version + # is installed + return(TRUE) + } else{ + # Otherwise, install the provided version + inst_result = install_py_pkg_reticulate(package = paste0(package_name, + "==", + version_number), + env = env_to_use) + return(inst_result) + } + } + } else { + if (!contains_version_number) { + # If it is not installed, and has no version + # number, install it. + inst_result = install_py_pkg_reticulate(package = package_name, + env = env_to_use) + } else{ + # If it is not installed, and has a version + # number, concatenate the package and verion + # strings, and install + inst_result = install_py_pkg_reticulate(package = paste0(package_name, + "==", + version_number), + env = env_to_use) + } + return(inst_result) + } +} diff --git a/R/spatial_genes.R b/R/spatial_genes.R index d9d43daa5..991c95f7a 100644 --- a/R/spatial_genes.R +++ b/R/spatial_genes.R @@ -1716,8 +1716,9 @@ silhouetteRankTest = function(gobject, if ('eva' %in% rownames(installed.packages()) == FALSE) { stop("\n package ", 'eva', " is not yet installed \n", "To install: \n", - "install.packages('eva_0.2.5.tar.gz', repos=NULL, type='source')", - "see https://cran.r-project.org/src/contrib/Archive/eva/") + "install.packages('eva')") + # "install.packages('eva_0.2.5.tar.gz', repos=NULL, type='source')", + # "see https://cran.r-project.org/src/contrib/Archive/eva/") } ## test if python package is installed @@ -1746,7 +1747,8 @@ silhouetteRankTest = function(gobject, # expression values values = match.arg(expression_values, c('normalized', 'scaled', 'custom')) expr_values = get_expression_values(gobject = gobject, - values = values) + values = values, + output = 'matrix') # subset genes if(!is.null(subset_genes)) { diff --git a/R/spatial_in_situ_visuals.R b/R/spatial_in_situ_visuals.R index aa95387a5..57615a1c9 100644 --- a/R/spatial_in_situ_visuals.R +++ b/R/spatial_in_situ_visuals.R @@ -478,7 +478,9 @@ spatInSituPlotPoints <- function(gobject, sel_feats = feats) } - spatial_feat_info = do.call('rbind', spatial_feat_info) + spatial_feat_info = rbindlist(spatial_feat_info, fill = TRUE) + + plot = plot_feature_points_layer(ggobject = plot, spatial_feat_info = spatial_feat_info, @@ -521,7 +523,10 @@ spatInSituPlotPoints <- function(gobject, feat_type = feat_type, include_poly_info = TRUE, poly_info = polygon_feat_type) - polygon_dt = polygon_combo[[feat_type]] + + #polygon_dt = polygon_combo[[feat_type]] + polygon_dt = rbindlist(polygon_combo, fill = TRUE) + data.table::setnames(polygon_dt, old = 'cell_ID', new = 'poly_ID') #polygon_info = get_polygon_info(gobject = gobject, @@ -568,7 +573,8 @@ spatInSituPlotPoints <- function(gobject, feat_type = feat_type, include_poly_info = TRUE, poly_info = polygon_feat_type) - polygon_dt = polygon_combo[[feat_type]] + #polygon_dt = polygon_combo[[feat_type]] + polygon_dt = rbindlist(polygon_combo, fill = TRUE) data.table::setnames(polygon_dt, old = 'cell_ID', new = 'poly_ID') #polygon_info = get_polygon_info(gobject = gobject, diff --git a/R/spdep.R b/R/spdep.R new file mode 100644 index 000000000..5b7466e91 --- /dev/null +++ b/R/spdep.R @@ -0,0 +1,194 @@ +#' Compute spatial auto correlation using spdep +#' +#' @param gobject Input a Giotto object. +#' @param method Specify a method name to compute auto correlation. +#' Available methods include \code{"geary.test", "lee.test", "lm.morantest","moran.test"}. +#' @param spat_unit spatial unit +#' @param feat_type feature type +#' @param expression_values expression values to use, default = normalized +#' @param spatial_network_to_use spatial network to use, default = spatial_network +#' @param verbose be verbose +#' +#' @return A data table with computed values for each feature. +#' @export +#' @import data.table +#' @examples +#' \dontrun{ +#' library(Giotto) +#' library(GiottoData) +#' library(spdep) +#' g = loadGiottoMini(dataset = "visium") +#' spdepAutoCorr(gobject = g, method = "moran.test") +#' } + +spdepAutoCorr <- function (gobject, + method = c("geary.test", "lee.test", "lm.morantest","moran.test"), + spat_unit = NULL, + feat_type = NULL, + expression_values = "normalized", + spatial_network_to_use = "spatial_network", + return_gobject = FALSE, + verbose = FALSE){ + + # Check and match the specified method argument + method <- match.arg(method) + + # Check gobject and set spat_unit and feat_type + if(!is.null(gobject)) { + spat_unit = set_default_spat_unit(gobject = gobject, + spat_unit = spat_unit) + feat_type = set_default_feat_type(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type) + } + else { + stop('gobject has not been provided\n') + } + + # Evaluate spatial autocorrelation using Giotto + resultSpdepCor <- evaluate_autocor_input(gobject = gobject, + use_ext_vals = FALSE, + use_sn = TRUE, + use_expr = TRUE, + use_meta = FALSE, + spat_unit = spat_unit, + feat_type = feat_type, + feats = NULL, + method = "moran", + data_to_use = "expression", + expression_values = expression_values, + meta_cols = NULL, + spatial_network_to_use = spatial_network_to_use, + wm_method = "distance", + wm_name = "spat_weights", + node_values = NULL, + weight_matrix = NULL, + verbose = verbose) + + + # Extract feats and weight_matrix from the result + feat <- resultSpdepCor$feats + weight_matrix <- resultSpdepCor$weight_matrix + use_values <- resultSpdepCor$use_values + + #progressr + nfeats = length(feat) + step_size = step_size = ceiling(nfeats/10L) + + result_list <- list() + progressr::with_progress({ + if(step_size > 1) pb = progressr::progressor(steps = nfeats/step_size) + result_list <- lapply_flex(seq_along(feat), + function(feat_value){ + callSpdepVar <- callSpdep(method = method, + x = use_values[,feat_value], + listw = mat2listw (weight_matrix, style = "W")) + # Extract the estimated value from the result + result_value <- callSpdepVar$estimate[1] + temp_dt <- data.table(feat_ID = feat[feat_value], value = result_value) + # increment progress + if(exists('pb')) if(feat_value %% step_size == 0) pb() + return(temp_dt) + } + ) + }) + result_dt <- rbindlist(result_list) + + # Return the resulting datatable + if(isTRUE(return_gobject)) { + if(isTRUE(verbose)) wrap_msg('Appending', method, + 'results to feature metadata: fDataDT()') + gobject = addFeatMetadata(gobject = gobject, + spat_unit = spat_unit, + feat_type = feat_type, + new_metadata = result_dt, + by_column = TRUE, + column_feat_ID = 'feat_ID') + + return(gobject) + } else { + return(result_dt) + } + +} + + +#' Call the spdep function with required parameters +#' +#' @param method Specify method name to call from spdep with its required +#' parameters. +#' @param ... Additional parameters for the function. See spdep documentation +#'for relevant parameters. +#' @return Computed statistics from the specified method. +#' @export +#' @seealso \pkg{\link{spdep}} + +callSpdep <-function (method, ...){ + + # Load the 'spdep' package if not already installed + package_check(pkg_name = "spdep", repository = "CRAN", optional = FALSE) + + # Check if 'method' argument is NULL, if so, stop with an error + if (is.null(method)){ + stop ("The 'method' argument has not been provided. Please specify a valid method.") + } + + # Check if 'method' exists in the 'spdep' package, if not, stop with an error + if(!(method %in% ls("package:spdep"))){ + stop(paste("Invalid method name. Method", method, + "is not available in the spdep package.")) + } + + # Fetch the arguments of the 'method' from 'spdep' + fun <- get(method, envir = loadNamespace('spdep')) + allArgs <- args(fun) |> as.list() |> names() + + # Capture arguments provided by the user + methodparam <- list (...) + + # Check if the user provided the listw argument + if ("listw" %in% names(methodparam)) { + listw_arg <- methodparam$listw + + # Check if listw_arg is a matrix + if (is.matrix(listw_arg)) { + # Convert the matrix to a listw object + listw_arg <- spdep::mat2listw(listw_arg, style = "W") + } + else if (!inherits(listw_arg, "listw")) { + stop("listw must be either a matrix or a listw object.") + } + + # Update the listw argument in methodparam + methodparam$listw <- listw_arg + } + + + # Check if all user-provided arguments are valid + if (all(!(names(methodparam))%in% allArgs)){ + stop("Invalid or missing parameters.") + } + # A vector of specified arguments that trigger 'spW <- spweights.constants()' + requiredArgs <- c("n", "n1", "n2", "n3", "nn", "S0", "S1", "S2") + + # Check if any of the specified arguments are required by the method + if (any(requiredArgs %in% allArgs)) { + # Obtain arguments from 'spweights.constants' + spW <- spweights.constants(listw = methodparam$listw) + # Combine user-provided arguments and 'spW', checking only against 'feats' value + combinedParams <- append(methodparam, spW) + }else{ + combinedParams <- methodparam + } + + + # Identify common parameters between user and 'spdep' + commonParams <- intersect(names(combinedParams), allArgs) + + # Create a named list of common parameters + combinedParams <- combinedParams[commonParams] + + # Call the function with its parameters + do.call(eval(parse(text=paste0("spdep::", method))), combinedParams) +} + diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 000000000..d71acfb90 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: ~ +template: + bootstrap: 5 + diff --git a/inst/python/ad2g.py b/inst/python/ad2g.py index a246e0a3e..d64d96a8a 100644 --- a/inst/python/ad2g.py +++ b/inst/python/ad2g.py @@ -33,6 +33,7 @@ def extract_expression(adata = None): ad_guard(adata) # anndata stores expression as cells by feats # giotto stores expression as feats by cells + adata.X.sort_indices() expr = adata.X.transpose() return expr @@ -292,7 +293,6 @@ def align_network_data(distances = None, weights = None): idx_dist_not_sparse = distances.nonzero() blank = [0 for i in range(len(idx_dist_not_sparse[0]))] df = pd.DataFrame({"distance":blank.copy(), "weight":blank.copy(), "from":blank.copy(), "to":blank.copy()}) - t0 = perf_counter() d_nz = distances[idx_dist_not_sparse] d_nz = np.array(d_nz).reshape(len(d_nz.T),) @@ -320,8 +320,6 @@ def align_network_data(distances = None, weights = None): # to += 1 # df.iloc[x,:] = {"distance":dist, "weight":weig, "from":from_, "to":to} - t1 = perf_counter() - print("Network extraction time:",t1-t0) return df ## Spatial Network diff --git a/inst/python/configuration/genv_current.yml b/inst/python/configuration/genv_current.yml index 0a9bfcdda..a7e4c1502 100644 --- a/inst/python/configuration/genv_current.yml +++ b/inst/python/configuration/genv_current.yml @@ -8,6 +8,7 @@ dependencies: - pip - pip: - smfishhmrf + - git+https://github.com/wwang-chcn/bento-tools.git@giotto_install - pandas==1.5.1 - networkx==2.8.8 - python-igraph==0.10.2 diff --git a/inst/python/g2bento.py b/inst/python/g2bento.py new file mode 100644 index 000000000..d4089ac43 --- /dev/null +++ b/inst/python/g2bento.py @@ -0,0 +1,92 @@ +import bento as bt +import geopandas as gpd +import pandas as pd +from anndata import AnnData +from shapely.geometry import MultiPolygon, Polygon + +from log import debug, warning, info + +def create_seg_df(vertices_df: pd.DataFrame, x: str = 'x', y: str = 'y', cell_id: str = 'cell_id') -> pd.DataFrame: + """ + Create a dataframe with cell_id and geometry columns from a dataframe with vertices + :param vertices_df: a dataframe with columns: cell_id, x, y + :param x: the column name of x coordinates + :param y: the column name of y coordinates + :param cell_id: the column name of cell id + :param bounds: the bounds of the area (minx, miny, maxx, maxy) + :return: a dataframe with cell_id and geometry columns + """ + # --- create polygons --- + polygons = vertices_df.groupby(cell_id).apply(lambda group: Polygon(zip(group[x], group[y]))) # type: ignore + seg_df = gpd.GeoDataFrame(polygons, columns=['geometry']) + # --- correct invalid polygons --- + corrected_seg_df = seg_df.copy(deep=True) + for i in range(seg_df.shape[0]): + if not seg_df.iloc[i, 0].is_valid: # type: ignore + corrected_seg_df.iloc[i, 0] = seg_df.iloc[i, 0].buffer(0) # type: ignore + if isinstance(corrected_seg_df.iloc[i, 0], MultiPolygon): + corrected_seg_df.iloc[i, 0] = max(corrected_seg_df.iloc[i, 0], key=lambda x: x.area) # type: ignore + return corrected_seg_df + + +def add_batch(adata: AnnData, cell_shape: pd.DataFrame): + """ + Add batch information to an AnnData object + If cell_seg have batch information, add batch information to adata, else all batch will be set to 0 + :param adata: an AnnData object + :param cell_seg: the name of the cell segmentation + :return: an AnnData object with batch information + """ + if 'batch' in cell_shape.columns: + info('Batch information found in cell_shape, adding batch information to adata') + adata.obs['batch'] = [cell_shape.loc[cell_shape['cell_id']==cell,'batch'].values[0] for cell in adata.obs_names] # type: ignore + adata.uns['points']['batch'] = [cell_shape.loc[cell_shape['cell_id']==cell,'batch'].values[0] for cell in adata.uns['points']['cell']] # type: ignore + else: # Interim measures, batch information may not transfered to cell_shape + warning('Batch information not found in cell_shape, all batch will be set to 0') + adata.obs['batch'] = 0 + adata.uns['points']['batch'] = 0 + adata.obs['batch'] = adata.obs['batch'].astype('category') + adata.uns['points']['batch'] = adata.uns['points']['batch'].astype('category') + + +def create_AnnData(trainscripts, cell_shape, nucleus_shape) -> AnnData: + # --- processing input --- + trainscripts = pd.DataFrame(trainscripts) + cell_shape = pd.DataFrame(cell_shape) + cell_shape['cell_id'] = cell_shape['cell_id'].astype('category') + if 'batch' in cell_shape.columns: + cell_shape['batch'] = cell_shape['batch'].astype('category') + nucleus_shape = pd.DataFrame(nucleus_shape) + nucleus_shape['cell_id'] = nucleus_shape['cell_id'].astype('category') + + # --- create shape --- + cell_seg = create_seg_df(cell_shape, x='x', y='y', cell_id='cell_id') + nucleus_seg = create_seg_df(nucleus_shape, x='x', y='y', cell_id='cell_id') + if cell_seg.shape[0] > 500: + warning('cell_seg has more than 500 cells, processing may take a long time.') + + # --- filter cells --- + # Let Giotto perform the filtering + legal_cells = pd.Series([True] * cell_seg.shape[0]) + + # --- create AnnData --- + adata: AnnData = bt.io.prepare(molecules=trainscripts, cell_seg=cell_seg, other_seg={'nucleus': nucleus_seg}) # type: ignore + add_batch(adata, cell_shape) + + # --- filter genes --- + # Interim measures + # subsetting methods (both AnnData and Giotto) may preserve genes that are not in the subsetted adata + legal_genes = adata.var_names.isin(set(adata.uns['points']['gene'].values)) + filtered_adata = adata[legal_cells,legal_genes] # type: ignore + filtered_adata.uns['points']['gene'] = filtered_adata.uns['points']['gene'].cat.remove_unused_categories() + + # Interim measures + # subsetted adata (_is_view == True) don't have _X property, which will cause unexpect error for adata.__sizeof__ + # when create object in R, reticulate will call sys.getsizeof to get the size of the object, which will call adata.__sizeof__ + # https://github.com/rstudio/reticulate/issues/1332 + # https://github.com/rstudio/rstudio/issues/13491 + # repoted to AnnData here: https://github.com/scverse/anndata/issues/1127 + filtered_adata._X = filtered_adata.X + return filtered_adata + + # return adata diff --git a/inst/python/log.py b/inst/python/log.py new file mode 100644 index 000000000..e6c3f2d1c --- /dev/null +++ b/inst/python/log.py @@ -0,0 +1,41 @@ +import sys +import time + + +def get_current_time() -> str: + return time.strftime('%H:%M:%S', time.localtime()) + + +def write_direct_message(message: str): + curr_time_str = get_current_time() + sys.stdout.write(f'{curr_time_str} --- {message}\n') + sys.stdout.flush() + + +def debug(message: str): + write_direct_message(f'DEBUG: {message}') + + +def info(message: str): + write_direct_message(f'INFO: {message}') + + +def write_direct_message_err(message: str): + curr_time_str = get_current_time() + sys.stderr.write(f'{curr_time_str} --- {message}\n') + sys.stderr.flush() + + +def warning(message: str): + write_direct_message_err(f'WARNING: {message}') + + +def error(message: str): + write_direct_message_err(f'ERROR: {message}') + + +def critical(message: str): + write_direct_message_err(f'CRITICAL: {message}') + + +__all__ = ['debug', 'info', 'warning', 'error', 'critical'] diff --git a/inst/python/python_bento_analysis.py b/inst/python/python_bento_analysis.py new file mode 100644 index 000000000..e96b167f3 --- /dev/null +++ b/inst/python/python_bento_analysis.py @@ -0,0 +1,410 @@ +from typing import List, Optional, Union, Iterable +import bento as bt +from anndata import AnnData +import emoji +from bento._utils import track +from bento.tools._colocation import _colocation_tensor +from bento.tools import decompose +from kneed import KneeLocator +from sklearn.utils import resample +from minisom import MiniSom +from tqdm.auto import tqdm +import numpy as np +from scipy.sparse import csr_matrix +import rasterio +import shapely +import geopandas as gpd +import pandas as pd +from bento.geometry import sindex_points + +import matplotlib as mpl +import matplotlib.pyplot as plt +import seaborn as sns + +from log import warning, info + + +# --------------------------------- +# modified bento and dependencies functions/classes +# --------------------------------- +@track +def colocation( + data: AnnData, + ranks: List[int], + fname: str, + iterations: int = 3, + plot_error: bool = True, + copy: bool = False, +): + """Decompose a tensor of pairwise colocalization quotients into signatures. + + Parameters + ---------- + adata : AnnData + Spatial formatted AnnData object. + ranks : list + List of ranks to decompose the tensor. + iterations : int + Number of iterations to run the decomposition. + plot_error : bool + Whether to plot the error of the decomposition. + copy : bool + Whether to return a copy of the AnnData object. Default False. + Returns + ------- + adata : AnnData + .uns['factors']: Decomposed tensor factors. + .uns['factors_error']: Decomposition error. + """ + adata = data.copy() if copy else data + + print("Preparing tensor...") + _colocation_tensor(adata, copy=copy) + + tensor = adata.uns["tensor"] + + print(emoji.emojize(":running: Decomposing tensor...")) + factors, errors = decompose(tensor, ranks, iterations=iterations) + + if plot_error and errors.shape[0] > 1: + kl = KneeLocator(errors["rank"], errors["rmse"], direction="decreasing", curve="convex") + if kl.knee is None: + warning('No knee found, please extend the ranks range.\nCurrent ranks range: [{ranks[0]},{ranks[-1]}]') + else: + info(f'Knee found at rank {kl.knee}') + fig, ax = plt.subplots(figsize=(6, 4)) + sns.lineplot(data=errors, x="rank", y="rmse", ci=95, marker="o", ax=ax) # type: ignore + plt.axvline(kl.knee, linestyle="--") + plt.savefig(fname) + plt.close(fig) + info(f"Saved to {fname}") + + adata.uns["factors"] = factors + adata.uns["factors_error"] = errors + + print(emoji.emojize(":heavy_check_mark: Done.")) + return adata if copy else None + + +@track +def fluxmap( + data: AnnData, + fname: str, + n_clusters: Union[Iterable[int], int] = range(2, 9), + num_iterations: int = 1000, + train_size: float = 0.2, + res: float = 0.1, + random_state: int = 11, + plot_error: bool = True, + copy: bool = False, +): + """Cluster flux embeddings using self-organizing maps (SOMs) and vectorize clusters as Polygon shapes. + + Parameters + ---------- + data : AnnData + Spatial formatted AnnData object. + n_clusters : int or list + Number of clusters to use. If list, will pick best number of clusters + using the elbow heuristic evaluated on the quantization error. + num_iterations : int + Number of iterations to use for SOM training. + train_size : float + Fraction of cells to use for SOM training. Default 0.2. + res : float + Resolution used for rendering embedding. Default 0.05. + random_state : int + Random state to use for SOM training. Default 11. + plot_error : bool + Whether to plot quantization error. Default True. + copy : bool + Whether to return a copy the AnnData object. Default False. + + Returns + ------- + adata : AnnData + .uns["cell_raster"] : DataFrame + Adds "fluxmap" column denoting cluster membership. + .uns["points"] : DataFrame + Adds "fluxmap#" columns for each cluster. + .obs : GeoSeries + Adds "fluxmap#_shape" columns for each cluster rendered as (Multi)Polygon shapes. + """ + adata = data.copy() if copy else data + + # Check if flux embedding has been computed + if "flux_embed" not in adata.uns: + raise ValueError("Flux embedding has not been computed. Run `bento.tl.flux()` first.") + + flux_embed = adata.uns["flux_embed"] + raster_points = adata.uns["cell_raster"] + + if isinstance(n_clusters, int): + n_clusters = [n_clusters] + + if isinstance(n_clusters, range): + n_clusters = list(n_clusters) + + # Subsample flux embeddings for faster training + if train_size > 1: + raise ValueError("train_size must be less than 1.") + if train_size == 1: + flux_train = flux_embed + if train_size < 1: + flux_train = resample( + flux_embed, + n_samples=int(train_size * flux_embed.shape[0]), + random_state=random_state, + ) + + # Perform SOM clustering over n_clusters range and pick best number of clusters using elbow heuristic + pbar = tqdm(total=4) + pbar.set_description(emoji.emojize(f"Optimizing # of clusters")) + som_models = {} + quantization_errors = [] + for k in tqdm(n_clusters, leave=False): + som = MiniSom(1, k, flux_train.shape[1], random_seed=random_state) # type: ignore + som.random_weights_init(flux_train) # type: ignore + som.train(flux_train, num_iterations, random_order=False, verbose=False) # type: ignore + som_models[k] = som + quantization_errors.append(som.quantization_error(flux_embed)) + + # Use kneed to find elbow + if len(n_clusters) > 1: # type: ignore + kl = KneeLocator(n_clusters, quantization_errors, curve="convex", direction="decreasing") + if kl.elbow is None: + warning( + 'No elbow found, please extend the n_clusters range.\nCurrent n_clusters range: [{n_clusters[0]},{n_clusters[-1]}]' + ) + return adata if copy else None + else: + info(f'Elbow found at {kl.elbow}') + best_k = kl.elbow + fig, ax = plt.subplots(figsize=(6, 4)) + sns.lineplot(x=n_clusters, y=quantization_errors, ci=95, marker="o", ax=ax) # type: ignore + plt.axvline(kl.elbow, linestyle="--") + plt.savefig(fname) + plt.close(fig) + info(f"Saved to {fname}") + + else: + best_k = n_clusters[0] # type: ignore + pbar.update() + + # Use best k to assign each sample to a cluster + pbar.set_description(f"Assigning to {best_k} clusters") + som = som_models[best_k] + winner_coordinates = np.array([som.winner(x) for x in flux_embed]).T + + # Indices start at 0, so add 1 + qnt_index = np.ravel_multi_index(winner_coordinates, (1, best_k)) + 1 # type: ignore + raster_points["fluxmap"] = qnt_index + adata.uns["cell_raster"] = raster_points.copy() + + pbar.update() + + # Vectorize polygons in each cell + pbar.set_description(emoji.emojize("Vectorizing domains")) + cells = raster_points["cell"].unique().tolist() + # Scale down to render resolution + # raster_points[["x", "y"]] = raster_points[["x", "y"]] * res + + # Cast to int + raster_points[["x", "y", "fluxmap"]] = raster_points[["x", "y", "fluxmap"]].astype(int) + + rpoints_grouped = raster_points.groupby("cell") + fluxmap_df = dict() + for cell in tqdm(cells, leave=False): + rpoints = rpoints_grouped.get_group(cell) + + # Fill in image at each point xy with fluxmap value by casting to dense matrix + image = (csr_matrix(( + rpoints["fluxmap"], + ( + (rpoints["y"] * res).astype(int), + (rpoints["x"] * res).astype(int), + ), + )).todense().astype("int16")) + + # Find all the contours + contours = rasterio.features.shapes(image) # type: ignore + polygons = np.array([(shapely.geometry.shape(p), v) for p, v in contours]) # type: ignore + shapes = gpd.GeoDataFrame( + polygons[:, 1], + geometry=gpd.GeoSeries(polygons[:, 0]).T, + columns=["fluxmap"], + ) # type: ignore + + # Remove background shape + shapes["fluxmap"] = shapes["fluxmap"].astype(int) # type: ignore + shapes = shapes[shapes["fluxmap"] != 0] + + # Group same fields as MultiPolygons + shapes = shapes.dissolve("fluxmap")["geometry"] # type: ignore + + fluxmap_df[cell] = shapes + + fluxmap_df = pd.DataFrame.from_dict(fluxmap_df).T + fluxmap_df.columns = "fluxmap" + fluxmap_df.columns.astype(str) + "_shape" + + # Upscale to match original resolution + fluxmap_df = fluxmap_df.apply(lambda col: gpd.GeoSeries(col).scale(xfact=1 / res, yfact=1 / res, origin=(0, 0))) + pbar.update() + + pbar.set_description("Saving") + old_cols = adata.obs.columns[adata.obs.columns.str.startswith("fluxmap")] + adata.obs = adata.obs.drop(old_cols, axis=1, errors="ignore") + + adata.obs[fluxmap_df.columns] = fluxmap_df.reindex(adata.obs_names) + + old_cols = adata.uns["points"].columns[adata.uns["points"].columns.str.startswith("fluxmap")] + adata.uns["points"] = adata.uns["points"].drop(old_cols, axis=1) + + # TODO SLOW + sindex_points(adata, "points", fluxmap_df.columns.tolist()) + pbar.update() + pbar.set_description("Done") + pbar.close() + + return adata if copy else None + + +# --------------------------------- +# bento wrapper functions +# --------------------------------- + + +def analysis_shape_features(adata: AnnData, feature_names: Optional[List[str]] = None) -> None: + """ + Examples + -------- + >>> analysis_shape_features(adata=bento_adata) + """ + if feature_names is None: + feature_names = list(bt.tl.list_shape_features().keys()) + bt.tl.obs_stats(adata, feature_names=feature_names) + + +def plot_shape_features_analysis_results(adata: AnnData, fname: str): + """ + Examples + -------- + >>> plot_shape_features_analysis_results(adata=bento_adata, fname='test_shape_features.pdf') + """ + bt.pl.shapes(adata, fname=fname) + + +def analysis_points_features(adata: AnnData, + shapes_names: Optional[List[str]] = None, + feature_names: Optional[List[str]] = None) -> None: + """ + Examples + -------- + >>> analysis_points_features(adata=bento_adata) + """ + if shapes_names is None: + shapes_names = ["cell_shape", "nucleus_shape"] + if feature_names is None: + feature_names = list(bt.tl.list_point_features().keys()) + bt.tl.analyze_points(adata, shape_names=shapes_names, feature_names=feature_names, groupby='gene') + + +def plot_points_features_analysis_results(adata: AnnData, fname: str) -> None: + """ + Examples + -------- + >>> plot_points_features_analysis_results(adata=bento_adata, fname='test_points_features.pdf') + """ + bt.pl.points(adata, fname=fname) + + +def analysis_RNAflux(adata: AnnData) -> None: + """ + Examples + -------- + >>> analysis_RNAflux(adata=bento_adata) + """ + bt.tl.flux(adata) + + +def plot_RNAflux_analysis_results(adata: AnnData, fname: str) -> None: + """ + Examples + -------- + >>> plot_RNAflux_analysis_results(adata=bento_adata, fname='test_RNAflux.pdf') + """ + bt.pl.flux(adata, fname=fname) + + +def analysis_RNAfluxmap(adata: AnnData, fname: str, n_clusters: Union[Iterable[int], int] = range(2, 9)) -> None: + """ + Examples + -------- + >>> analysis_RNAfluxmap(adata=bento_adata, fname='test_RNAfluxmap_elbow_pos.png', n_clusters=seq(20)) + """ + if fname is None: + fname = 'fluxmap_elbow.pdf' + fluxmap(adata, fname, n_clusters=n_clusters) + + +def plot_RNAfluxmap_analysis_results(adata: AnnData, fname: str) -> None: + """ + Examples + -------- + >>> plot_RNAfluxmap_analysis_results(adata=bento_adata, fname='test_RNAfluxmap.pdf') + """ + bt.pl.fluxmap(adata, fname=fname) + + +def analysis_rna_forest(adata: AnnData) -> None: + """ + Examples + -------- + >>> analysis_rna_forest(adata=bento_adata) + """ + bt.tl.lp(adata) + bt.tl.lp_stats(adata) + + +def plot_rna_forest_analysis_results(adata: AnnData, fname1: str, fname2: str) -> None: + """ + Examples + -------- + >>> plot_rna_forest_analysis_results(adata=bento_adata, fname1='test_rna_forest_radvis.pdf', fname2='test_rna_forest_upset.pdf') + """ + bt.pl.lp_genes(adata, fname=fname1) + bt.pl.lp_dist(adata, fname=fname2) + + +def analysis_colocalization(adata: AnnData, fname: str, ranks: Optional[List[int]] = None) -> None: + """ + Examples + -------- + >>> analysis_colocalization(adata=bento_adata, fname='test_colocalization_knee_pos.pdf') + """ + if ranks is None: + ranks = list(range(1, 6)) + + # Cytoplasm = cell - nucleus + adata.obs["cytoplasm_shape"] = bt.geo.get_shape(adata, "cell_shape") - bt.geo.get_shape(adata, "nucleus_shape") + + # Create point index + adata.uns["points"]["cytoplasm"] = (adata.uns["points"]["nucleus"].astype(int) < 0).astype(int) + + bt.tl.coloc_quotient(adata, shapes=["cytoplasm_shape", "nucleus_shape"]) + + colocation(adata, ranks=ranks, fname=fname) + + +def plot_colocalization_analysis_results(adata: AnnData, fname: str, rank: int) -> None: + """ + Examples + -------- + >>> plot_colocalization_analysis_results(adata=bento_adata, fname='test_colocalization.pdf', rank=2) + """ + bt.pl.colocation(adata, rank=rank, fname=fname) + + +def python_session_info(): + import session_info + session_info.show(html=False, dependencies=True) diff --git a/man/anndataToGiotto.Rd b/man/anndataToGiotto.Rd index 095fadf02..f4e3625c3 100644 --- a/man/anndataToGiotto.Rd +++ b/man/anndataToGiotto.Rd @@ -11,7 +11,8 @@ anndataToGiotto( deluanay_spat_net = TRUE, spat_unit = NULL, feat_type = NULL, - python_path = NULL + python_path = NULL, + env_name = "giotto_env" ) } \arguments{ @@ -40,6 +41,8 @@ Cannot be the same as n_key_added.} \item{feat_type}{desired feature type for conversion, default NULL} \item{python_path}{path to python executable within a conda/miniconda environment} + +\item{env_name}{name of environment containing python_path executable} } \value{ Giotto object diff --git a/man/calculateSpatCellMetadataProportions.Rd b/man/calculateSpatCellMetadataProportions.Rd new file mode 100644 index 000000000..19f640029 --- /dev/null +++ b/man/calculateSpatCellMetadataProportions.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/auxiliary_giotto.R +\name{calculateSpatCellMetadataProportions} +\alias{calculateSpatCellMetadataProportions} +\title{calculateSpatCellMetadataProportions} +\usage{ +calculateSpatCellMetadataProportions( + gobject, + spat_unit = NULL, + feat_type = NULL, + spat_network = NULL, + metadata_column = NULL, + name = "proportion", + return_gobject = TRUE +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + +\item{spat_network}{spatial network} + +\item{metadata_column}{metadata column to use} + +\item{name}{descriptve name for the calculated proportions} + +\item{return_gobject}{return giotto object} + +\item{metadata_cols}{annotation columns found in \code{pDataDT(gobject)}} +} +\value{ +giotto object (default) or enrichment object if return_gobject = FALSE +} +\description{ +calculates a proportion table for a cell metadata column (e.g. cluster labels) +for all the spatial neighbors of a source cell. In other words it calculates the +niche composition for a given annotation for each cell. +} diff --git a/man/callSpdep.Rd b/man/callSpdep.Rd new file mode 100644 index 000000000..59dc05a8f --- /dev/null +++ b/man/callSpdep.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spdep.R +\name{callSpdep} +\alias{callSpdep} +\title{Call the spdep function with required parameters} +\usage{ +callSpdep(method, ...) +} +\arguments{ +\item{method}{Specify method name to call from spdep with its required +parameters.} + +\item{...}{Additional parameters for the function. See spdep documentation +for relevant parameters.} +} +\value{ +Computed statistics from the specified method. +} +\description{ +Call the spdep function with required parameters +} +\seealso{ +\pkg{\link{spdep}} +} diff --git a/man/checkPythonPackage.Rd b/man/checkPythonPackage.Rd new file mode 100644 index 000000000..cb1044610 --- /dev/null +++ b/man/checkPythonPackage.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/python_environment.R +\name{checkPythonPackage} +\alias{checkPythonPackage} +\title{Check Python Package Installation} +\usage{ +checkPythonPackage( + package_name = NULL, + github_package_url = NULL, + env_to_use = "giotto_env" +) +} +\arguments{ +\item{package_name}{name of python package. See details.} + +\item{github_package_url}{URL linking to github repository containing +a python package that may be installed with pip, +e.g. `git+https://github.com/TencentAILabHealthcare/pysodb.git`; +see details.} + +\item{env_to_use}{name of the environment into which the python +package should be installed.} +} +\description{ +checks python environment for a +provided package, installs if it is not found. +} +\details{ +Parameter `github_package_url` takes precedent over +`package_name`, i.e. if both are provided, only the github +URL will be installed. This function should only be provided +one parameter, or the other. +} +\keyword{export} diff --git a/man/createBentoAdata.Rd b/man/createBentoAdata.Rd new file mode 100644 index 000000000..9b07c8d83 --- /dev/null +++ b/man/createBentoAdata.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/python_bento.R +\name{createBentoAdata} +\alias{createBentoAdata} +\title{Create bento adata object from gobject} +\usage{ +createBentoAdata(gobject = NULL, env_to_use = NULL) +} +\arguments{ +\item{gobject}{Giotto object} + +\item{env_to_use}{Python environment within which bento is installed. +If it is not already installed, the user +will be prompted to install `bento-tools` +DEFAULT: "giotto_env"} +} +\value{ +bento_adata bento adata object +} +\description{ +Create bento adata object from gobject +} diff --git a/man/doClusterProjection.Rd b/man/doClusterProjection.Rd new file mode 100644 index 000000000..8bbd456b2 --- /dev/null +++ b/man/doClusterProjection.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering.R +\name{doClusterProjection} +\alias{doClusterProjection} +\title{Project of cluster labels} +\usage{ +doClusterProjection( + target_gobject, + target_cluster_label_name = "knn_labels", + spat_unit = NULL, + feat_type = NULL, + source_gobject, + source_cluster_labels = NULL, + reduction = "cells", + reduction_method = "pca", + reduction_name = "pca", + dimensions_to_use = 1:10, + knn_k = 10, + prob = FALSE, + algorithm = c("kd_tree", "cover_tree", "brute"), + return_gobject = TRUE +) +} +\arguments{ +\item{target_gobject}{target giotto object} + +\item{target_cluster_label_name}{name for predicted clusters} + +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + +\item{source_gobject}{source giotto object with annotation data} + +\item{source_cluster_labels}{annotation/labels to use to train KNN classifier} + +\item{reduction}{reduction on cells or features (default = cells)} + +\item{reduction_method}{shared reduction method (default = pca space)} + +\item{reduction_name}{name of shared reduction space (default name = 'pca')} + +\item{dimensions_to_use}{dimensions to use in shared reduction space (default = 1:10)} + +\item{knn_k}{number of k-neighbors to train a KNN classifier} + +\item{prob}{output probabilities together with label predictions} + +\item{algorithm}{nearest neighbor search algorithm} + +\item{return_gobject}{return giotto object} +} +\value{ +giotto object (default) or data.table with cell metadata +} +\description{ +Use a fast KNN classifier to predict labels from a smaller giotto object +} +\details{ +Function to train a KNN with \code{\link[FNN]{knn}}. The training data +is obtained from the source giotto object (source_gobject) using existing annotations +within the cell metadata. Cells without annotation/labels from the target giotto +object (target_gobject) will receive predicted labels (and optional probabilities +with prob = TRUE). + +**IMPORTANT** This projection assumes that you're using the same dimension reduction +space (e.g. PCA) and number of dimensions (e.g. first 10 PCs) to train the KNN +classifier as you used to create the initial annotations/labels in the source +Giotto object. + +Altogether this is a convenience function that allow you to work with very big +data as you can predict cell labels on a smaller & subsetted Giotto object and then +project the cell labels to the remaining cells in the target Giotto object. +} diff --git a/man/doGiottoClustree.Rd b/man/doGiottoClustree.Rd index 4cd0f4a08..3f2e3bc4a 100644 --- a/man/doGiottoClustree.Rd +++ b/man/doGiottoClustree.Rd @@ -44,11 +44,11 @@ cluster cells using leiden methodology to visualize different resolutions } \details{ This function tests different resolutions for Leiden clustering and provides a visualization -of cluster sizing as resolution varies. +of cluster sizing as resolution varies. By default, the tested leiden clusters are NOT saved to the Giotto object, and a plot is returned. -If return_gobject is set to TRUE, and a giotto object with *all* tested leiden cluster information +If return_gobject is set to TRUE, and a giotto object with *all* tested leiden cluster information will be returned. } \seealso{ diff --git a/man/doLeidenClusterIgraph.Rd b/man/doLeidenClusterIgraph.Rd new file mode 100644 index 000000000..58f0f7dbd --- /dev/null +++ b/man/doLeidenClusterIgraph.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clustering.R +\name{doLeidenClusterIgraph} +\alias{doLeidenClusterIgraph} +\title{doLeidenClusterIgraph} +\usage{ +doLeidenClusterIgraph( + gobject, + spat_unit = NULL, + feat_type = NULL, + name = "leiden_clus", + nn_network_to_use = "sNN", + network_name = "sNN.pca", + objective_function = c("modularity", "CPM"), + weights = NULL, + resolution_parameter = 1, + beta = 0.01, + initial_membership = NULL, + n_iterations = 1000, + return_gobject = TRUE, + set_seed = TRUE, + seed_number = 1234, + ... +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit (e.g. "cell")} + +\item{feat_type}{feature type (e.g. "rna", "dna", "protein")} + +\item{name}{name for cluster, default to "leiden_clus"} + +\item{nn_network_to_use}{type of NN network to use (kNN vs sNN), default to "sNN"} + +\item{network_name}{name of NN network to use, default to "sNN.pca"} + +\item{objective_function}{objective function for the leiden algo} + +\item{weights}{weights of edges} + +\item{resolution_parameter}{resolution, default = 1} + +\item{beta}{leiden randomness} + +\item{initial_membership}{initial membership of cells for the partition} + +\item{n_iterations}{number of interations to run the Leiden algorithm.} + +\item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{set_seed}{set seed} + +\item{seed_number}{number for seed} +} +\value{ +giotto object with new clusters appended to cell metadata +} +\description{ +cluster cells using a NN-network and the Leiden community +detection algorithm as implemented in igraph +} +\details{ +This function is a wrapper for the Leiden algorithm implemented in igraph, +which can detect communities in graphs of millions of nodes (cells), +as long as they can fit in memory. See \code{\link[igraph]{cluster_leiden}} for more information. + +Set \emph{weights = NULL} to use the vertices weights associated with the igraph network. +Set \emph{weights = NA} if you don't want to use vertices weights +} diff --git a/man/giottoToAnnData.Rd b/man/giottoToAnnData.Rd index 8393f3d98..929cd991c 100644 --- a/man/giottoToAnnData.Rd +++ b/man/giottoToAnnData.Rd @@ -9,6 +9,7 @@ giottoToAnnData( spat_unit = NULL, feat_type = NULL, python_path = NULL, + env_name = "giotto_env", save_directory = NULL ) } @@ -21,6 +22,8 @@ giottoToAnnData( \item{python_path}{path to python executable within a conda/miniconda environment} +\item{env_name}{name of environment containing python_path executable} + \item{save_directory}{directory in which the file will be saved.} } \value{ diff --git a/man/giottoToSeurat.Rd b/man/giottoToSeuratV4.Rd similarity index 78% rename from man/giottoToSeurat.Rd rename to man/giottoToSeuratV4.Rd index 16184cc2e..af48f777a 100644 --- a/man/giottoToSeurat.Rd +++ b/man/giottoToSeuratV4.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/interoperability.R -\name{giottoToSeurat} -\alias{giottoToSeurat} -\title{Convert Giotto to Seurat} +\name{giottoToSeuratV4} +\alias{giottoToSeuratV4} +\title{Convert Giotto to Seurat V4} \usage{ -giottoToSeurat(gobject, spat_unit = NULL, obj_use = NULL, ...) +giottoToSeuratV4(gobject, spat_unit = NULL, obj_use = NULL, ...) } \arguments{ \item{gobject}{Giotto object} diff --git a/man/giottoToSeuratV5.Rd b/man/giottoToSeuratV5.Rd new file mode 100644 index 000000000..221947f85 --- /dev/null +++ b/man/giottoToSeuratV5.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interoperability.R +\name{giottoToSeuratV5} +\alias{giottoToSeuratV5} +\title{Convert Giotto to Seurat V5} +\usage{ +giottoToSeuratV5(gobject, spat_unit = NULL, obj_use = NULL, ...) +} +\arguments{ +\item{gobject}{Giotto object} + +\item{spat_unit}{spatial unit (e.g. 'cell')} + +\item{obj_use}{Giotto object (deprecated, use gobject)} + +\item{...}{additional params to pass to \code{\link{get_spatial_locations}}} +} +\value{ +Seurat object +} +\description{ +Converts Giotto object into a Seurat object. This functions extracts +specific sets of data belonging to specified spatial unit. +The default values are 'cell' and 'rna' respectively. +} diff --git a/man/install_github_link_pip.Rd b/man/install_github_link_pip.Rd new file mode 100644 index 000000000..ca38e1650 --- /dev/null +++ b/man/install_github_link_pip.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/python_environment.R +\name{install_github_link_pip} +\alias{install_github_link_pip} +\title{Install Package from GitHub Link} +\usage{ +install_github_link_pip(link = NULL, env = NULL) +} +\arguments{ +\item{link}{link to github repository containing a python package, +e.g. `git+https://github.com/TencentAILabHealthcare/pysodb.git`} + +\item{env}{conda environment to which `link` will be installed via pip} +} +\description{ +Installs `link` to python `env` +} +\keyword{internal} diff --git a/man/install_py_pkg_reticulate.Rd b/man/install_py_pkg_reticulate.Rd new file mode 100644 index 000000000..9759ee64e --- /dev/null +++ b/man/install_py_pkg_reticulate.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/python_environment.R +\name{install_py_pkg_reticulate} +\alias{install_py_pkg_reticulate} +\title{Install Python Package with Reticulate} +\usage{ +install_py_pkg_reticulate(package = NULL, env = NULL) +} +\arguments{ +\item{package}{name of python package} + +\item{env}{name of the environment into which the python +package should be installed.} +} +\description{ +Install Python Package with Reticulate +} +\details{ +Installs `package` to python `env` after prompting user. +Installation is done via `py_install` from the +`reticulate` package. +} +\keyword{internal} diff --git a/man/py_install_prompt.Rd b/man/py_install_prompt.Rd new file mode 100644 index 000000000..4aba3e353 --- /dev/null +++ b/man/py_install_prompt.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/python_environment.R +\name{py_install_prompt} +\alias{py_install_prompt} +\title{Prompt User for Python Install} +\usage{ +py_install_prompt(package = NULL, env = NULL) +} +\arguments{ +\item{package}{python package/github url} + +\item{env}{environment into which package will be installed} +} +\description{ +prompts user to install a package +} +\keyword{internal} diff --git a/man/runPCA.Rd b/man/runPCA.Rd index cd873d887..04463b6cd 100644 --- a/man/runPCA.Rd +++ b/man/runPCA.Rd @@ -18,7 +18,7 @@ runPCA( scale_unit = TRUE, ncp = 100, method = c("irlba", "exact", "random", "factominer"), - method_params = list(NA), + method_params = BiocParallel::SerialParam(), rev = FALSE, set_seed = TRUE, seed_number = 1234, @@ -53,7 +53,7 @@ runPCA( \item{method}{which implementation to use} -\item{method_params}{additional parameters} +\item{method_params}{BiocParallelParam object} \item{rev}{do a reverse PCA} diff --git a/man/runPCA_BiocSingular.Rd b/man/runPCA_BiocSingular.Rd index 5eed4ec01..5a0fabf4a 100644 --- a/man/runPCA_BiocSingular.Rd +++ b/man/runPCA_BiocSingular.Rd @@ -13,7 +13,7 @@ runPCA_BiocSingular( set_seed = TRUE, seed_number = 1234, BSPARAM = c("irlba", "exact", "random"), - BSParameters = list(NA), + BPPARAM = BiocParallel::SerialParam(), ... ) } @@ -34,7 +34,7 @@ runPCA_BiocSingular( \item{BSPARAM}{method to use} -\item{BSParameters}{additonal parameters for method} +\item{BPPARAM}{BiocParallelParam object} } \value{ list of eigenvalues, loadings and pca coordinates diff --git a/man/runPCA_BiocSingular_irlba_projection.Rd b/man/runPCA_BiocSingular_irlba_projection.Rd new file mode 100644 index 000000000..c15befc14 --- /dev/null +++ b/man/runPCA_BiocSingular_irlba_projection.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimension_reduction.R +\name{runPCA_BiocSingular_irlba_projection} +\alias{runPCA_BiocSingular_irlba_projection} +\title{runPCA_BiocSingular_irlba_projection} +\usage{ +runPCA_BiocSingular_irlba_projection( + x, + ncp = 100, + center = TRUE, + scale = TRUE, + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + BPPARAM = BiocParallel::SerialParam(), + random_subset = 500, + verbose = TRUE, + ... +) +} +\arguments{ +\item{x}{matrix or object that can be converted to matrix} + +\item{ncp}{number of principal components to calculate} + +\item{center}{center the matrix before pca} + +\item{scale}{scale features} + +\item{rev}{reverse PCA} + +\item{set_seed}{use of seed} + +\item{seed_number}{seed number to use} + +\item{BPPARAM}{BiocParallelParam object} + +\item{random_subset}{random subset to perform PCA on} + +\item{verbose}{verbosity level} +} +\value{ +list of eigenvalues, loadings and pca coordinates +} +\description{ +Performs PCA based on the biocSingular package on a +subset of the matrix. It uses the obtained loadings to predicted coordinates +for the remaining matrix. +} +\keyword{internal} diff --git a/man/runPCAprojection.Rd b/man/runPCAprojection.Rd new file mode 100644 index 000000000..43bcdc48c --- /dev/null +++ b/man/runPCAprojection.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimension_reduction.R +\name{runPCAprojection} +\alias{runPCAprojection} +\title{runPCAprojection} +\usage{ +runPCAprojection( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + name = "pca.projection", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ... +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + +\item{expression_values}{expression values to use} + +\item{reduction}{cells or genes} + +\item{random_subset}{random subset to perform PCA on} + +\item{name}{arbitrary name for PCA run} + +\item{feats_to_use}{subset of features to use for PCA} + +\item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{center}{center data first (default = TRUE)} + +\item{scale_unit}{scale features before PCA (default = TRUE)} + +\item{ncp}{number of principal components to calculate} + +\item{method}{which implementation to use} + +\item{method_params}{BiocParallelParam object} + +\item{rev}{do a reverse PCA} + +\item{set_seed}{use of seed} + +\item{seed_number}{seed number to use} + +\item{verbose}{verbosity of the function} + +\item{...}{additional parameters for PCA (see details)} + +\item{genes_to_use}{deprecated use feats_to_use} +} +\value{ +giotto object with updated PCA dimension recuction +} +\description{ +runs a Principal Component Analysis on a random subet + projection +} +\details{ +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. +This PCA implementation is similar to \code{\link{runPCA}}, except that it +performs PCA on a subset of the cells or features, and predict on the others. +This can significantly increase speed without sacrificing accuracy too much. +\itemize{ + \item feats_to_use = NULL: will use all features from the selected matrix + \item feats_to_use = : can be used to select a column name of + highly variable features, created by (see \code{\link{calculateHVF}}) + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features +} +} diff --git a/man/runPCAprojectionBatch.Rd b/man/runPCAprojectionBatch.Rd new file mode 100644 index 000000000..2533e097c --- /dev/null +++ b/man/runPCAprojectionBatch.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimension_reduction.R +\name{runPCAprojectionBatch} +\alias{runPCAprojectionBatch} +\title{runPCAprojectionBatch} +\usage{ +runPCAprojectionBatch( + gobject, + spat_unit = NULL, + feat_type = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + random_subset = 500, + batch_number = 5, + name = "pca.projection.batch", + feats_to_use = "hvf", + return_gobject = TRUE, + center = TRUE, + scale_unit = TRUE, + ncp = 100, + method = c("irlba"), + method_params = BiocParallel::SerialParam(), + rev = FALSE, + set_seed = TRUE, + seed_number = 1234, + verbose = TRUE, + ... +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + +\item{expression_values}{expression values to use} + +\item{reduction}{cells or genes} + +\item{random_subset}{random subset to perform PCA on} + +\item{batch_number}{number of random batches to run} + +\item{name}{arbitrary name for PCA run} + +\item{feats_to_use}{subset of features to use for PCA} + +\item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{center}{center data first (default = TRUE)} + +\item{scale_unit}{scale features before PCA (default = TRUE)} + +\item{ncp}{number of principal components to calculate} + +\item{method}{which implementation to use} + +\item{method_params}{BiocParallelParam object} + +\item{rev}{do a reverse PCA} + +\item{set_seed}{use of seed} + +\item{seed_number}{seed number to use} + +\item{verbose}{verbosity of the function} + +\item{...}{additional parameters for PCA (see details)} + +\item{genes_to_use}{deprecated use feats_to_use} +} +\value{ +giotto object with updated PCA dimension recuction +} +\description{ +runs a Principal Component Analysis on multiple random batches + projection +} +\details{ +See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters. +This PCA implementation is similar to \code{\link{runPCA}} and \code{\link{runPCAprojection}}, +except that it performs PCA on multiple subsets (batches) of the cells or features, +and predict on the others. This can significantly increase speed without sacrificing accuracy too much. +\itemize{ + \item feats_to_use = NULL: will use all features from the selected matrix + \item feats_to_use = : can be used to select a column name of + highly variable features, created by (see \code{\link{calculateHVF}}) + \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features +} +} diff --git a/man/runUMAPprojection.Rd b/man/runUMAPprojection.Rd new file mode 100644 index 000000000..454cb393f --- /dev/null +++ b/man/runUMAPprojection.Rd @@ -0,0 +1,97 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dimension_reduction.R +\name{runUMAPprojection} +\alias{runUMAPprojection} +\title{Run UMAP dimension reduction} +\usage{ +runUMAPprojection( + gobject, + feat_type = NULL, + spat_unit = NULL, + expression_values = c("normalized", "scaled", "custom"), + reduction = c("cells", "feats"), + dim_reduction_to_use = "pca", + dim_reduction_name = NULL, + dimensions_to_use = 1:10, + random_subset = 500, + name = NULL, + feats_to_use = NULL, + return_gobject = TRUE, + n_neighbors = 40, + n_components = 2, + n_epochs = 400, + min_dist = 0.01, + n_threads = NA, + spread = 5, + set_seed = TRUE, + seed_number = 1234, + verbose = T, + toplevel_params = 2, + ... +) +} +\arguments{ +\item{gobject}{giotto object} + +\item{feat_type}{feature type} + +\item{spat_unit}{spatial unit} + +\item{expression_values}{expression values to use} + +\item{reduction}{cells or genes} + +\item{dim_reduction_to_use}{use another dimension reduction set as input} + +\item{dim_reduction_name}{name of dimension reduction set to use} + +\item{dimensions_to_use}{number of dimensions to use as input} + +\item{random_subset}{random subset to perform UMAP on} + +\item{name}{arbitrary name for UMAP run} + +\item{feats_to_use}{if dim_reduction_to_use = NULL, which genes to use} + +\item{return_gobject}{boolean: return giotto object (default = TRUE)} + +\item{n_neighbors}{UMAP param: number of neighbors} + +\item{n_components}{UMAP param: number of components} + +\item{n_epochs}{UMAP param: number of epochs} + +\item{min_dist}{UMAP param: minimum distance} + +\item{n_threads}{UMAP param: threads/cores to use} + +\item{spread}{UMAP param: spread} + +\item{set_seed}{use of seed} + +\item{seed_number}{seed number to use} + +\item{verbose}{verbosity of function} + +\item{toplevel_params}{parameters to extract} + +\item{...}{additional UMAP parameters} + +\item{genes_to_use}{deprecated, use feats_to_use} +} +\value{ +giotto object with updated UMAP dimension reduction +} +\description{ +run UMAP on subset and project on the rest +} +\details{ +See \code{\link[uwot]{umap}} for more information about these and other parameters. +\itemize{ + \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca') + \item To use gene expression as input set dim_reduction_to_use = NULL + \item If dim_reduction_to_use = NULL, genes_to_use can be used to select a column name of + highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes + \item multiple UMAP results can be stored by changing the \emph{name} of the analysis +} +} diff --git a/man/seuratToGiotto.Rd b/man/seuratToGiottoV4.Rd similarity index 81% rename from man/seuratToGiotto.Rd rename to man/seuratToGiottoV4.Rd index 610596f53..914434fdd 100644 --- a/man/seuratToGiotto.Rd +++ b/man/seuratToGiottoV4.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/interoperability.R -\name{seuratToGiotto} -\alias{seuratToGiotto} -\title{Convert a Seurat object to a Giotto object} +\name{seuratToGiottoV4} +\alias{seuratToGiottoV4} +\title{Convert a Seurat V4 object to a Giotto object} \usage{ -seuratToGiotto( +seuratToGiottoV4( sobject, spatial_assay = "Spatial", dim_reduction = c("pca", "umap"), @@ -26,5 +26,5 @@ Default is \code{"Vizgen"}.} A Giotto object converted from Seurat object with all computations stored in it. } \description{ -Convert a Seurat object to a Giotto object +Convert a Seurat V4 object to a Giotto object } diff --git a/man/seuratToGiottoV5.Rd b/man/seuratToGiottoV5.Rd new file mode 100644 index 000000000..761617f77 --- /dev/null +++ b/man/seuratToGiottoV5.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interoperability.R +\name{seuratToGiottoV5} +\alias{seuratToGiottoV5} +\title{Convert a Seurat V5 object to a Giotto object} +\usage{ +seuratToGiottoV5( + sobject, + spatial_assay = "Spatial", + dim_reduction = c("pca", "umap"), + subcellular_assay = "Vizgen", + spatial_loc = "spatial" +) +} +\arguments{ +\item{sobject}{Input Seurat object to convert to Giotto object} + +\item{spatial_assay}{Specify name of the spatial assay slot in Seurat. Default is \code{"Spatial"}.} + +\item{dim_reduction}{Specify which dimensional reduction computations to fetch from +input Seurat object. Default is \code{"c('pca', 'umap')"}.} + +\item{subcellular_assay}{Specify name of the subcellular assay in input object. +Default is \code{"Vizgen"}.} +} +\value{ +A Giotto object converted from Seurat object with all computations stored in it. +} +\description{ +Convert a Seurat V5 object to a Giotto object +} diff --git a/man/seuratToGiotto_OLD.Rd b/man/seuratToGiotto_OLD.Rd deleted file mode 100644 index 45cb5266e..000000000 --- a/man/seuratToGiotto_OLD.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/interoperability.R -\name{seuratToGiotto_OLD} -\alias{seuratToGiotto_OLD} -\title{seuratToGiotto_OLD} -\usage{ -seuratToGiotto_OLD(obj_use = NULL, ...) -} -\arguments{ -\item{obj_use}{Seurat object} - -\item{...}{additional params to pass} -} -\value{ -Giotto object -} -\description{ -Converts Seurat object into a Giotto object. Deprecated, see \code{\link{giottoToSeurat}} -} diff --git a/man/spatialExperimentToGiotto.Rd b/man/spatialExperimentToGiotto.Rd index 44cf8c10e..ee4c49d7f 100644 --- a/man/spatialExperimentToGiotto.Rd +++ b/man/spatialExperimentToGiotto.Rd @@ -23,7 +23,7 @@ all existing networks.} \item{sp_network}{Specify the name of the spatial network(s) in the input SpatialExperiment object. Default \code{NULL} will use all existing -networks.} +networks. This can be a vector of multiple network names.} \item{verbose}{A boolean value specifying if progress messages should be displayed or not. Default \code{TRUE}.} diff --git a/man/spdepAutoCorr.Rd b/man/spdepAutoCorr.Rd new file mode 100644 index 000000000..0e23d3f0b --- /dev/null +++ b/man/spdepAutoCorr.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spdep.R +\name{spdepAutoCorr} +\alias{spdepAutoCorr} +\title{Compute spatial auto correlation using spdep} +\usage{ +spdepAutoCorr( + gobject, + method = c("geary.test", "lee.test", "lm.morantest", "moran.test"), + spat_unit = NULL, + feat_type = NULL, + expression_values = "normalized", + spatial_network_to_use = "spatial_network", + return_gobject = FALSE, + verbose = FALSE +) +} +\arguments{ +\item{gobject}{Input a Giotto object.} + +\item{method}{Specify a method name to compute auto correlation. +Available methods include \code{"geary.test", "lee.test", "lm.morantest","moran.test"}.} + +\item{spat_unit}{spatial unit} + +\item{feat_type}{feature type} + +\item{expression_values}{expression values to use, default = normalized} + +\item{spatial_network_to_use}{spatial network to use, default = spatial_network} + +\item{verbose}{be verbose} +} +\value{ +A data table with computed values for each feature. +} +\description{ +Compute spatial auto correlation using spdep +} +\examples{ +\dontrun{ +library(Giotto) +library(GiottoData) +library(spdep) +g = loadGiottoMini(dataset = "visium") +spdepAutoCorr(gobject = g, method = "moran.test") +} +}