emosca-cnr
diff --git a/‎DESCRIPTION
Lines changed: 4 additions & 32 deletions b/‎DESCRIPTION
Lines changed: 4 additions & 32 deletions
diff --git a/‎NAMESPACE
Lines changed: 33 additions & 8 deletions b/‎NAMESPACE
Lines changed: 33 additions & 8 deletions
diff --git a/‎NEWS
Lines changed: 3 additions & 0 deletions b/‎NEWS
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/barplot_cluster.R
Lines changed: 3 additions & 2 deletions b/‎R/barplot_cluster.R
Lines changed: 3 additions & 2 deletions
diff --git a/‎R/boxplot_cluster.R
Lines changed: 3 additions & 3 deletions b/‎R/boxplot_cluster.R
Lines changed: 3 additions & 3 deletions
diff --git a/‎R/boxplot_points.R
Lines changed: 1 addition & 1 deletion b/‎R/boxplot_points.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/calculate_gs_scores.R
Lines changed: 10 additions & 4 deletions b/‎R/calculate_gs_scores.R
Lines changed: 10 additions & 4 deletions
diff --git a/‎R/calculate_gs_scores_in_clusters.R
Lines changed: 2 additions & 3 deletions b/‎R/calculate_gs_scores_in_clusters.R
Lines changed: 2 additions & 3 deletions
diff --git a/‎R/cell_coocc_partitions.R
Lines changed: 19 additions & 0 deletions b/‎R/cell_coocc_partitions.R
Lines changed: 19 additions & 0 deletions
diff --git a/‎R/cluster_by_features.R
Lines changed: 1 addition & 1 deletion b/‎R/cluster_by_features.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/csea.R
Lines changed: 1 addition & 1 deletion b/‎R/csea.R
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/gs_scores_in_clusters.R
Lines changed: 6 additions & 2 deletions b/‎R/gs_scores_in_clusters.R
Lines changed: 6 additions & 2 deletions
diff --git a/‎R/heatmap_CNV.R
Lines changed: 2 additions & 2 deletions b/‎R/heatmap_CNV.R
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/inter_datasets_comparison.R
Lines changed: 3 additions & 3 deletions b/‎R/inter_datasets_comparison.R
Lines changed: 3 additions & 3 deletions
@@ -1,7 +1,7 @@
 Package: scMuffin
 Title: MUlti-Features INtegrative approach for single-cell data analysis
-Version: 1.1.7
-Date: 2024-02-26
+Version: 1.2.0
+Date: 2024-04-10
 Authors@R: 
      c(person(given = "Valentina",
            family = "Nale",
@@ -30,34 +30,6 @@ RoxygenNote: 7.2.3
 Encoding: UTF-8
 Depends: 
     R (>= 4.0.0)
-Imports:
-    graphics,
-    parallel,
-    org.Hs.eg.db,
-    RColorBrewer,
-    dendextend,
-    destiny,
-    Seurat,
-    stats,
-    utils,
-    DESeq2,
-    msigdbr,
-    methods,
-    cluster,
-    ComplexHeatmap,
-    VGAM,
-    dendsort,
-    grDevices,
-    ggplot2,
-    circlize,
-    Matrix,
-    pals,
-    plotrix,
-    qvalue
-Suggests: 
-    rmarkdown,
-    knitr,
-    BiocStyle,
-    devtools,
-    sf
+Imports: graphics, parallel, org.Hs.eg.db, destiny, Seurat, stats, utils, msigdbr, methods, ComplexHeatmap, grDevices, ggplot2, circlize, Matrix, pals, plotrix, qvalue
+Suggests: rmarkdown, knitr, BiocStyle, devtools
 VignetteBuilder: knitr
@@ -12,6 +12,7 @@ export(boxplot_points)
 export(calculate_CNV)
 export(calculate_gs_scores)
 export(calculate_gs_scores_in_clusters)
+export(cel_coocc_partitions)
 export(cluster_by_features)
 export(cluster_csea)
 export(cluster_hyper)
@@ -29,7 +30,6 @@ export(gs_score)
 export(gs_scores_in_clusters)
 export(heatmap_CNV)
 export(inter_dataset_comparison)
-export(inter_ds_cluster_sim)
 export(keep_strongest_representative)
 export(ora)
 export(overlap_coefficient)
@@ -52,22 +52,26 @@ export(sc_create_null)
 export(sc_data_bin)
 export(show_tissues)
 export(transcr_compl)
-import(ComplexHeatmap)
-import(Seurat)
-import(grDevices)
-import(graphics)
-import(grid)
-import(pals)
-import(parallel)
+importFrom(ComplexHeatmap,Heatmap)
+importFrom(ComplexHeatmap,anno_mark)
+importFrom(ComplexHeatmap,columnAnnotation)
+importFrom(ComplexHeatmap,draw)
+importFrom(ComplexHeatmap,rowAnnotation)
 importFrom(Matrix,Matrix)
 importFrom(Matrix,colMeans)
 importFrom(Matrix,colSums)
 importFrom(Matrix,rowSums)
 importFrom(Seurat,AddMetaData)
+importFrom(Seurat,CreateSeuratObject)
 importFrom(Seurat,DimPlot)
 importFrom(Seurat,FeaturePlot)
 importFrom(Seurat,FetchData)
+importFrom(Seurat,FindClusters)
+importFrom(Seurat,FindNeighbors)
+importFrom(Seurat,GetAssayData)
 importFrom(Seurat,NormalizeData)
+importFrom(Seurat,RunPCA)
+importFrom(Seurat,ScaleData)
 importFrom(circlize,colorRamp2)
 importFrom(destiny,DPT)
 importFrom(destiny,DiffusionMap)
@@ -77,17 +81,38 @@ importFrom(ggplot2,cut_interval)
 importFrom(ggplot2,cut_number)
 importFrom(ggplot2,element_text)
 importFrom(ggplot2,theme)
+importFrom(grDevices,adjustcolor)
 importFrom(grDevices,boxplot.stats)
+importFrom(grDevices,dev.off)
 importFrom(grDevices,jpeg)
 importFrom(grDevices,png)
+importFrom(graphics,abline)
+importFrom(graphics,axis)
+importFrom(graphics,barplot)
+importFrom(graphics,boxplot)
+importFrom(graphics,layout)
+importFrom(graphics,legend)
+importFrom(graphics,par)
+importFrom(graphics,plot.new)
+importFrom(graphics,points)
+importFrom(graphics,rect)
+importFrom(graphics,text)
 importFrom(grid,gpar)
+importFrom(grid,grid.text)
+importFrom(grid,unit)
 importFrom(methods,is)
 importFrom(msigdbr,msigdbr)
 importFrom(org.Hs.eg.db,org.Hs.egCHRLOC)
 importFrom(org.Hs.eg.db,org.Hs.egCHRLOCEND)
 importFrom(org.Hs.eg.db,org.Hs.egENSEMBL)
 importFrom(org.Hs.eg.db,org.Hs.egSYMBOL)
 importFrom(pals,alphabet)
+importFrom(pals,alphabet2)
+importFrom(pals,brewer.accent)
+importFrom(pals,brewer.pastel1)
+importFrom(pals,brewer.purples)
+importFrom(pals,brewer.rdylbu)
+importFrom(pals,brewer.ylorrd)
 importFrom(parallel,mclapply)
 importFrom(plotrix,thigmophobe.labels)
 importFrom(qvalue,qvalue)
 
@@ -1,3 +1,6 @@
+version 1.2.0
+- removed null_model as argument of calculate_gs_score;
+- addedd the function cell_coocc_partitions
 version 1.1.7
 - introduced support for NA values in keep strongest representative
 version 1.1.6
 
@@ -12,10 +12,11 @@
 #' @param units image units
 #' @param res image resolution
 #' @param legend.pos position of barplot legend
-#' @importFrom grDevices jpeg
+#' @importFrom grDevices jpeg adjustcolor
 #' @importFrom stats setNames
 #' @importFrom plotrix thigmophobe.labels
-#' @import pals
+#' @importFrom pals alphabet2
+#' @importFrom graphics barplot text plot.new
 #' @description Produce barplots (1 for each cluster) of distribution of cells associated with the values of the selected feature. A png figure for each cluster is saved in dir_out.
 #' @export
 
 
@@ -12,9 +12,9 @@
 #' @param res image resolution
 #' @param feature_name the names of the feature that should be considered. It must be one of names(scMuffinList)
 #' @description Produce boxplots to visualize the distribution of cell values according to the selected feature A png figure for each cluster is saved in dir_out.
-#' @importFrom grDevices png
+#' @importFrom grDevices png dev.off
 #' @importFrom plotrix thigmophobe.labels
-#' @import graphics
+#' @importFrom graphics layout par points boxplot legend
 #' @export
 
 boxplot_cluster <- function(scMuffinList=NULL, feature_name=NULL, partition_id=NULL, dir_out="./", n_features=10, only_pos_nes=TRUE, do_scale_features=FALSE, cex.axis=0.8, width=180, height=180, units="mm", res=300){
@@ -92,7 +92,7 @@ boxplot_cluster <- function(scMuffinList=NULL, feature_name=NULL, partition_id=N
 			cbf_cl <- cells_by_features[, match(names(top_features$fdr), colnames(cells_by_features)), drop=FALSE]
 
 
-			png(paste0(dir_out, "/cluster_", cell_clusters_set[cl], ".png"), width=width, height=height, units=units, res=res)
+			png(file.path(dir_out, paste0("cluster_", cell_clusters_set[cl], ".png")), width=width, height=height, units=units, res=res)
 			layout(matrix(c(1, 1, 2, 3), nrow = 2, byrow = T))
 
 			par(mar = c(3, 3, 2, 1))
 
@@ -15,7 +15,7 @@
 #' @param image_format png or jpeg
 #' @param ... further argument to boxplot
 #' @importFrom pals alphabet
-#' @importFrom grDevices png jpeg
+#' @importFrom grDevices png jpeg adjustcolor
 #' @export
 
 boxplot_points <- function(x=NULL, f=NULL, col=NULL, amount=0.2, adj.col=1, pch=16, cex=0.6, ylim=NULL, file=NULL, width=180, height=180, units="mm", res=300, image_format="png", ...){
 
@@ -9,19 +9,20 @@
 #' @param k number of permutations
 #' @param kmin minimum number of permutations; due to missing values it is hard to ensure that a gene set score can be compared to k permutations in every cell
 #' @param score_type type of score. if "relative", than the score is the difference between the observed gene set average expression and that of a k permutations; if "mean" the score is equal to the observed gene set average expression
-#' @param null_model TRUE if permutations have to be used. Required for score_type="relative"
 #' @param verbose verbosity
 #' @param na.rm whether to use NA or not
 #' @param overwrite whether to update or not gene_set_scoring and gene_set_scoring_full elements of scMuffinList. 
 #' @return scMuffinList with element gene_set_scoring, a list that contains summary and full. The element summary contains a cells-by-gene sets data.frame. The element "full" contains a data.frame for each gene set. See [gs_score()] for further details.
 #' @references Tirosh2016 10.1126/science.aad0501
 #' 
 #' @export
-#' @import parallel
+#' @importFrom parallel mclapply
 
-calculate_gs_scores <- function(scMuffinList=NULL, gs_list=NULL, mc.cores=1, nbins=25, nmark_min = 5, ncells_min = 10, k=100, kmin=50, score_type=c("relative", "mean"), null_model=TRUE, verbose=FALSE, na.rm=TRUE, overwrite=FALSE){
+calculate_gs_scores <- function(scMuffinList=NULL, gs_list=NULL, mc.cores=1, nbins=25, nmark_min = 5, ncells_min = 10, k=100, kmin=50, score_type=c("relative", "mean"), verbose=FALSE, na.rm=TRUE, overwrite=FALSE){
 
 
+  score_type <- match.arg(arg = score_type, choices = c("relative", "mean"))
+  
   cat("####################################################\n")
   cat("nbins:", nbins, "\n")
   cat("nmark_min:", nmark_min, "\n")
@@ -30,12 +31,17 @@ calculate_gs_scores <- function(scMuffinList=NULL, gs_list=NULL, mc.cores=1, nbi
   kmin <- min(k, kmin)
   cat("kmin:", kmin, "\n")
   cat("score_type:", score_type, "\n")
-  cat("null_model:", null_model, "\n")
   cat("verbose:", verbose, "\n")
   cat("na.rm:", na.rm, "\n")
   cat("overwrite:", overwrite, "\n")
   cat("####################################################\n")
 
+  if(score_type == "relative"){
+    null_model <- TRUE
+  }else{
+    null_model <- FALSE
+  }
+
   if(length(scMuffinList$normalized)==0){
     stop("scMuffinList does not contain genes_by_cells\n")
   }
 
@@ -3,14 +3,13 @@
 #' @param scMuffinList scMuffinList object
 #' @param partition_id identifier of the partition to be used
 #' @param ncells_min minimum number of cells required for the calculation of the average signature in the cluster
-#' @param null_model TRUE to consider the empirical null based on gene set permutations
 #' @param alt alterative passed to [wilcox.test()] or [t.test()]
 #' @param test type of test: t to use [t.test()]; wrs to use [wilcox.test()]
 #' @param fract_min only clusters with this fraction of cells with not null gene set score will be considered
 #' @return scMuffinList with cluster level scores in `sMuffinList$cluster_data[[partition_id]]`. The element [summary] contains a clusters-by-gene sets table, while the element [full] the full result
 #' @export
 
-calculate_gs_scores_in_clusters <- function(scMuffinList=NULL, partition_id=NULL, ncells_min = 5, null_model = TRUE, alt="g", test="t", fract_min=0.5){
+calculate_gs_scores_in_clusters <- function(scMuffinList=NULL, partition_id=NULL, ncells_min = 5, alt="g", test="t", fract_min=0.5){
 
   if(!any(colnames(scMuffinList$partitions) == partition_id)){
     stop("Can't find any parition named ", partition_id, "\n")
@@ -19,7 +18,7 @@ calculate_gs_scores_in_clusters <- function(scMuffinList=NULL, partition_id=NULL
   cat("Clusters...\n")
   print(table(setNames(scMuffinList$partitions[, partition_id], rownames(scMuffinList$partitions))))
 
-  res_signatures_clusters <- lapply(scMuffinList$gene_set_scoring$full, function(i_marker_res) gs_scores_in_clusters(i_marker_res, cell_clusters=setNames(scMuffinList$partitions[, partition_id], rownames(scMuffinList$partitions)), ncells_min = ncells_min, fract_min = fract_min, null_model = null_model, alt=alt, test=test))
+  res_signatures_clusters <- lapply(scMuffinList$gene_set_scoring$full, function(i_marker_res) gs_scores_in_clusters(i_marker_res, cell_clusters=setNames(scMuffinList$partitions[, partition_id], rownames(scMuffinList$partitions)), ncells_min = ncells_min, fract_min = fract_min, alt=alt, test=test))
 
   #signatures-by-clusters matrix
   SC_signatures_by_cluster_matrix <- do.call(rbind, lapply(res_signatures_clusters, function(x) array(x$score[order(x$cluster)], dimnames = list(c(x$cluster[order(x$cluster)])))))
 
@@ -0,0 +1,19 @@
+#' Co-occurrence of cells between two partitions
+#' @description Calculate, as Jaccard index, the co-occurrence of cells in all the pairs of elements composed by an element from p1 and an element from p2.
+#' @param p1 a partition among scMuffinList$partitions, e.g. scML_demo$partitions$global_exp
+#' @param p2 a partition among scMuffinList$partitions, e.g. scML_demo$partitions$CNV
+#' @return matrix of Jaccard indeces between all levels of p1 and all levels of p2
+#' @export
+
+cel_coocc_partitions <- function(p1=NULL, p2=NULL){
+	
+	stopifnot(length(p1) == length(p2))
+	
+	p1_by_p2 <- table(p1, p2)
+	p1_size <- matrix(rep(rowSums(p1_by_p2), ncol(p1_by_p2)), ncol = ncol(p1_by_p2))
+	p2_size <- matrix(rep(colSums(p1_by_p2), nrow(p1_by_p2)), nrow = nrow(p1_by_p2), byrow = T)
+	p1_by_p2 <- p1_by_p2 / (p1_size + p2_size - p1_by_p2)
+	p1_by_p2[is.nan(p1_by_p2)] <- 0
+	
+	return(p1_by_p2)
+}
@@ -5,7 +5,7 @@
 #' @param n_comp numeric, Dimensions of reduction to use as input
 #' @param ... arguments passed to Seurat::FindCLusters
 #' @return features_by_cells Seurat Object, object with saved dimension reduction components calculate on features by cells matrix
-#' @import Seurat
+#' @importFrom Seurat CreateSeuratObject ScaleData RunPCA FindNeighbors FindClusters
 #' @export
 
 cluster_by_features <- function(features_by_cells=NULL, n_comp = 10, ...){
 
@@ -8,7 +8,7 @@
 #' @param min.k minimum number of valid permutations to support empirical nulls
 #' @param min.size minimum number of cells with a not null value
 #' @param fract_min only cluster of size less or equal to this fraction of cell with not null feature values will be analysed
-#' @import parallel
+#' @importFrom parallel mclapply
 #' @importFrom stats p.adjust
 #' @importFrom qvalue qvalue
 #' @return list with two data.frames, gs_table and leading_edge. 
 
@@ -4,18 +4,22 @@
 #' @param ncells_min minimum number of cells required for the calculation of the average signature in the cluster
 #' @param alt alterative passed to [wilcox.test()] or [t.test()]
 #' @param test type of test: t to use [t.test()]; wrs to use [wilcox.test()]
-#' @param null_model TRUE to consider the empirical null based on gene set permutations
 #' @param fract_min only clusters with this fraction of cells with not null gene set score will be considered
 #' @description Gene set scoring in clusters
 #' @importFrom stats median wilcox.test t.test p.adjust
 #' @export
 
-gs_scores_in_clusters <- function(score_table=NULL, cell_clusters=NULL, ncells_min=5, alt="g", test="t", null_model=TRUE, fract_min=0.5){
+gs_scores_in_clusters <- function(score_table=NULL, cell_clusters=NULL, ncells_min=5, alt="g", test="t", fract_min=0.5){
 
 	if(!is.factor(cell_clusters)){
 		cell_clusters <- as.factor(cell_clusters)
 	}
 
+	null_model <- FALSE
+	if(any(!is.na(score_table$avg_control))){
+		null_model <- TRUE
+	}
+	
   clusters_size <- table(cell_clusters)
 	clusters <- levels(cell_clusters)
 
 
@@ -13,8 +13,8 @@
 #' @param legend_fontsize legend fontsize
 #' @param genes.labels.fontsize gene labels fontsize
 #' @param ... arguments passed to ComplexHeatmap::Heatmap
-#' @import ComplexHeatmap 
-#' @importFrom grid gpar
+#' @importFrom ComplexHeatmap rowAnnotation anno_mark Heatmap draw
+#' @importFrom grid gpar unit
 #' @importFrom grDevices png
 #' @export
 
 
@@ -1,14 +1,14 @@
 #' Inter-dataset cluster similarity
 #' @param seu_obj_list list of Seurat objects
 #' @param gsl gene set list
-##' @param genes_min minimum number of genes required among the markers of a cluster
+#' @param genes_min minimum number of genes required among the markers of a cluster
 #' @param genes_max maximum number of genes required among the markers of a cluster
 #' @param mc.cores number of cores
-#' @param null_model whether to use or not the empirical null model. See calculate_signature
 #' @param ncells_min minim number of cells in a cluster 
 #' @param cluster_rows whether to cluster or not the rows
 #' @param cluster_columns whether to cluster or not the columns
 #' @param ... arguments passed to calculate_gs_scores
+#' @importFrom Seurat GetAssayData
 #' @export
 #' @description Quantify the similarity between clusters of two datasets, on the basis of the average cluster marker expression
 #' @return A list with:
@@ -20,7 +20,7 @@
 #'   \item{markers_2, markers of dataset 2;}
 #' }
 
-inter_dataset_comparison <- function(seu_obj_list=NULL, gsl=NULL, genes_min=3, genes_max=500, mc.cores=1, null_model=TRUE, ncells_min=5, cluster_rows = FALSE, cluster_columns = FALSE,  ...){
+inter_dataset_comparison <- function(seu_obj_list=NULL, gsl=NULL, genes_min=3, genes_max=500, mc.cores=1, ncells_min=5, cluster_rows = FALSE, cluster_columns = FALSE,  ...){
 
 
   if(!all(unlist(lapply(seu_obj_list, function(x) is(x, "Seurat"))))){