version 2.2

MaximilianLombardo · Sep 8, 2014 · b20ca75 · b20ca75
1 parent b8cc848
commit b20ca75
Show file tree

Hide file tree

Showing 42 changed files with 410 additions and 5 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,13 @@
 Package: SNFtool
 Type: Package
 Title: Similarity Network Fusion
-Version: 2.1
-Date: 2014-03-03
+Version: 2.2
+Date: 2014-09-08
 Author: Bo Wang, Aziz Mezlini, Feyyaz Demir, Marc Fiume, Zhuowen Tu, Michael Brudno, Benjamin Haibe-Kains, Anna Goldenberg
 Maintainer: Bo Wang <wangbo.yunze@gmail.com>
+Imports: heatmap.plus
 Description: Similarity Network Fusion takes multiple views of a network and fuses them together to construct an overall status matrix. The input to our algorithm can be feature vectors, pairwise distances, or pairwise similarities. The learned status matrix can then be used for retrieval, clustering, and classification.
 License: GPL
 NeedsCompilation: no
 Repository: CRAN
-Date/Publication: 2014-03-29 07:07:26
+Date/Publication: 2014-09-08 23:16:27
diff --git a/MD5 b/MD5
@@ -1,15 +1,18 @@
-628cb77b4eef92bac57ab2d808ee6f29 *DESCRIPTION
-475c34d27f1672e1c78f3d106a30181d *NAMESPACE
+2cbc05552449767a216beabd3cb02466 *DESCRIPTION
+a4feb42a6a38007d2a5c4117e0dca30b *NAMESPACE
 33c2efd34842588cabbaa2093a626bec *R/SNF.R
 983fa3905510bf2a88a87184787fd3f2 *R/affinityMatrix.R
 ae81becbdae5319a83a7d605aa625731 *R/calNMI.R
 a41b1cd5bf978b9e41f1cc07d4d597c0 *R/chiDist2.R
 9cdac19e14e8dccec6260927c6664516 *R/concordanceNetworkNMI.R
 79ce3ab02edf49f3765d7871dd6ea4f9 *R/displayClusters.r
+f482df9c1d5c70578021bd0101edd42d *R/displayClustersWithHeatmap.R
 343e2499f550b4fc781ffd6fa2dcb7fe *R/dist2.R
 522fe75e8637797841b85ae880df2837 *R/estimateNumberOfClustersGivenGraph.R
+b5f1f9bc3a1862041060dccbe2d863c6 *R/getColorsForGroups.R
 44ee3fbf9afc1b0f8c51098e26bf4555 *R/groupPredict.r
 0d279ac05c961e781026fc3d3b813c51 *R/internal.R
+58e24ebd89cafb0631f6019c70e9bfb9 *R/rankFeaturesByNMI.R
 10d0f6c209f053da70f167f24eacec73 *R/spectralClustering.r
 d9650e21bdc929938a5f92bf095a4e0b *R/standardNormalization.R
 e88ad915b528b87fe9004ef3548bcedb *README
@@ -26,10 +29,13 @@ ec5e4c768e443d254d8557ef1727831e *man/chiDist2.Rd
 95155f9bb979fcc08ed699b12745de42 *man/concordanceNetworkNMI.Rd
 d0e3eac3c047860daf2aaea259afb73e *man/dataL.Rd
 104393adbf87931de1c40b5bdc3ab778 *man/displayClusters.Rd
+633dddcf86bd63c5463c9547c1ea45c9 *man/displayClustersWithHeatmap.Rd
 6d14c4a3610cee400c70a1e124375bc6 *man/dist2.Rd
 894fe28e8b787a3b794c252c34ded7ec *man/estimateNumberOfClustersGivenGraph.Rd
+ad5e6a359736d83195838004d156181d *man/getColorsForGroups.Rd
 299f2209cf51901f3af023792198e2c1 *man/groupPredict.Rd
 8f70b32428c5cca15926a128ed579ea1 *man/internal.Rd
 7975be6e72cc919a4860ef7b52a532b8 *man/label.Rd
+10461884faf38517a8f82b0c44739561 *man/rankFeaturesByNMI.Rd
 40053d56a0c0f5b58b9cb8c5e0951995 *man/spectralClustering.Rd
 167bf83b88be9e8bf404dcc41ab848d3 *man/standardNormalization.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -1 +1,2 @@
+import(heatmap.plus)
 exportPattern("^[^\\.]") 
diff --git a/R/SNF.R b/R/SNF.R
diff --git a/R/affinityMatrix.R b/R/affinityMatrix.R
diff --git a/R/calNMI.R b/R/calNMI.R
diff --git a/R/chiDist2.R b/R/chiDist2.R
diff --git a/R/concordanceNetworkNMI.R b/R/concordanceNetworkNMI.R
diff --git a/R/displayClusters.r b/R/displayClusters.r
diff --git a/R/displayClustersWithHeatmap.R b/R/displayClustersWithHeatmap.R
@@ -0,0 +1,36 @@
+## Arguments:
+## W: Similarity matrix
+## group: a numeric vector containing the groups information for each sample in W such as the result of the spectralClustering function. The order should correspond to the sample order in W.
+## ColSideColors:  (optional) character vector of length ncol(x) containing the color names for a horizontal side bar that may be used to annotate the columns of x, used by the heatmap function,
+## OR a character matrix with number of rows matching number of rows in x.  Each column is plotted as a row similar to heatmap()'s ColSideColors by the heatmap.plus function.
+## ... other paramater that can be pass on to the heatmap (if ColSideColor is a NULL or a vector) or  heatmap.plus function (if ColSideColors is matrix)
+
+## Details:
+## Using the heatmap or heatmap.plus function to display the similarity matrix
+## For representation purpose, the similarity matrix diagonal is set to the median value of W, the matrix is normalised and W = W + t(W) is applied
+## In this presentation no clustering method is ran the samples are ordered in function of their group label present in the group arguments.
+
+## Values:
+## Plots the similarity matrix using the heatmap function. Samples are ordered by the clusters provided by the argument groups with sample information displayed with a color bar if the ColSideColors argument is informed.
+## Autors:
+
+displayClustersWithHeatmap <- function (W, group,ColSideColors=NULL,...) {
+  normalize <- function(X) X/rowSums(X)
+  ind = sort(as.vector(group), index.return = TRUE)
+  ind = ind$ix
+  ## diag(W) = 0
+  diag(W) = median(as.vector(W))
+  W = normalize(W)
+  W = W + t(W)
+  if(is.null(ColSideColors)){
+    heatmap(W[ind, ind],scale="none",Rowv=NA,Colv=NA,...)
+  }
+  else{
+    if(is.vector(ColSideColors)){
+      heatmap(W[ind, ind],scale="none",Rowv=NA,Colv=NA,ColSideColors=ColSideColors[ind],...)
+    }
+    else{
+      heatmap.plus(W[ind, ind],scale="none",Rowv=NA,Colv=NA,ColSideColors=ColSideColors[ind,],...)
+    }
+  }
+}
diff --git a/R/dist2.R b/R/dist2.R
diff --git a/R/estimateNumberOfClustersGivenGraph.R b/R/estimateNumberOfClustersGivenGraph.R
diff --git a/R/getColorsForGroups.R b/R/getColorsForGroups.R
@@ -0,0 +1,14 @@
+getColorsForGroups <- function(group,colors=c("red","blue","green","purple","grey","cyan","brown","pink")){
+  cluster_colors=group
+
+  if(max(group)<=length(colors)){
+    for(i in 1:max(group)){
+      cluster_colors[which(group==i)] <- colors[i]
+    }
+    return(cluster_colors)
+  }
+  else{
+    print("ERROR: Not enough colors using the default color argument for the different groups, PLEASE inform the colors argument")
+    return(NULL)
+  }
+}
diff --git a/R/groupPredict.r b/R/groupPredict.r
diff --git a/R/internal.R b/R/internal.R
diff --git a/R/rankFeaturesByNMI.R b/R/rankFeaturesByNMI.R
@@ -0,0 +1,43 @@
+## Arguments:
+## data: a list, where each item in the list is a matrix of values for each data type
+## W: the target network for which the NMI is calculated against for each feature
+##
+## Details:
+## NMI is calculated based on the clustering assignments using spectral clustering
+## The number of clusters is set based on the estimateNumberOfClustersGivenGraph on the target matrix 
+## using default parameters.
+##
+## Outputs:
+## A list that contains the NMI score for each feature and their ranks from highest to lowest
+## output[[1]] is the NMI score
+## output[[1]][[1]] is the NMI score of first data type
+## output[[1]][[1]][1] is the NMI score of the first feature of the first data type
+## similarly for output[[2]]... except it is the rank instead of the score
+
+rankFeaturesByNMI <- function(data, W) 
+{  
+  stopifnot(class(data) == "list")
+
+  NUM_OF_DATA_TYES <- length(data)
+  NMI_scores <- vector(mode="list", length=NUM_OF_DATA_TYES)
+  NMI_ranks <- vector(mode="list", length=NUM_OF_DATA_TYES)
+  num_of_clusters_fused <- estimateNumberOfClustersGivenGraph(W)[[1]]
+  clustering_fused <- spectralClustering(W, num_of_clusters_fused)
+
+  for (data_type_ind in 1:NUM_OF_DATA_TYES)
+  {
+    NUM_OF_FEATURES <- dim(data[[data_type_ind]])[2] 
+    NMI_scores[[data_type_ind]] <- vector(mode="numeric", length=NUM_OF_FEATURES)    
+
+    for (feature_ind in 1:NUM_OF_FEATURES)
+    {
+      affinity_matrix <- affinityMatrix(
+        dist2(as.matrix(data[[data_type_ind]][, feature_ind]), as.matrix(data[[data_type_ind]][, feature_ind])))      
+      clustering_single_feature <- spectralClustering(affinity_matrix, num_of_clusters_fused)
+      NMI_scores[[data_type_ind]][feature_ind] <- calNMI(clustering_fused, clustering_single_feature)      
+    }    
+    NMI_ranks[[data_type_ind]] <- rank(-NMI_scores[[data_type_ind]], ties.method="first")
+  }
+
+  return(list(NMI_scores, NMI_ranks))  
+}
diff --git a/R/spectralClustering.r b/R/spectralClustering.r
diff --git a/R/standardNormalization.R b/R/standardNormalization.R
diff --git a/README b/README
diff --git a/data/Data1.rda b/data/Data1.rda
diff --git a/data/Data2.rda b/data/Data2.rda
diff --git a/data/dataL.rda b/data/dataL.rda
diff --git a/data/label.rda b/data/label.rda
diff --git a/man/Data1.Rd b/man/Data1.Rd
diff --git a/man/Data2.Rd b/man/Data2.Rd
diff --git a/man/SNF.Rd b/man/SNF.Rd
diff --git a/man/affinityMatrix.Rd b/man/affinityMatrix.Rd
diff --git a/man/calNMI.Rd b/man/calNMI.Rd
diff --git a/man/chiDist2.Rd b/man/chiDist2.Rd
diff --git a/man/concordanceNetworkNMI.Rd b/man/concordanceNetworkNMI.Rd
diff --git a/man/dataL.Rd b/man/dataL.Rd
diff --git a/man/displayClusters.Rd b/man/displayClusters.Rd
diff --git a/man/displayClustersWithHeatmap.Rd b/man/displayClustersWithHeatmap.Rd
@@ -0,0 +1,114 @@
+\name{displayClustersWithHeatmap}
+\alias{displayClustersWithHeatmap}
+\title{
+Display the similarity matrix by clusters with some sample information
+}
+\description{
+Visualize the clusters present in the given similarity matrix as well as some sample information.
+}
+\usage{
+displayClustersWithHeatmap(W, group, ColSideColors=NULL, ...)
+}
+\arguments{
+  \item{W}{
+Similarity matrix
+}
+  \item{group}{
+A numeric vector containing the groups information for each sample in W such as the result of the spectralClustering function. The order should correspond to the sample order in W.
+}
+  \item{ColSideColors}{
+(optional) character vector of length ncol(x) containing the color names for a horizontal side bar that may be used to annotate the columns of x, used by the heatmap function, OR a character matrix with number of rows matching number of rows in x.  Each column is plotted as a row similar to heatmap()'s ColSideColors by the heatmap.plus function.
+}
+  \item{...}{
+other paramater that can be pass on to the heatmap (if ColSideColor is a NULL or a vector) or  heatmap.plus function (if ColSideColors is matrix)  
+}
+}
+\details{
+Using the heatmap or heatmap.plus function to display the similarity matrix
+For representation purpose, the similarity matrix diagonal is set to the median value of W, the matrix is normalised and W = W + t(W) is applied
+In this presentation no clustering method is ran the samples are ordered in function of their group label present in the group arguments.
+}
+\value{
+Plots the similarity matrix using the heatmap function. Samples are ordered by the clusters provided by the argument groups with sample information displayed with a color bar if the ColSideColors argument is informed.
+}
+\author{
+Florence Cavalli
+}
+\examples{
+## First, set all the parameters:
+K = 20;    # number of neighbors, usually (10~30)
+alpha = 0.5;    # hyperparameter, usually (0.3~0.8)
+T = 20;   # Number of Iterations, usually (10~20)
+
+## Data1 is of size n x d_1, 
+## where n is the number of patients, d_1 is the number of genes, 
+## Data2 is of size n x d_2, 
+## where n is the number of patients, d_2 is the number of methylation
+data(Data1)
+data(Data2)
+
+## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
+## And two data types have the same number of points. 
+## The first half data belongs to the first cluster; the rest belongs to the second cluster.
+truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
+
+## Calculate distance matrices
+## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
+
+## If the data are all continuous values, we recommend the users to perform 
+## standard normalization before using SNF, 
+## though it is optional depending on the data the users want to use.  
+# Data1 = standardNormalization(Data1);
+# Data2 = standardNormalization(Data2);
+
+## Calculate the pair-wise distance; 
+## If the data is continuous, we recommend to use the function "dist2" as follows 
+Dist1 = dist2(as.matrix(Data1),as.matrix(Data1));
+Dist2 = dist2(as.matrix(Data2),as.matrix(Data2));
+
+## next, construct similarity graphs
+W1 = affinityMatrix(Dist1, K, alpha)
+W2 = affinityMatrix(Dist2, K, alpha)
+
+## next, we fuse all the graphs
+## then the overall matrix can be computed by similarity network fusion(SNF):
+W = SNF(list(W1,W2), K, T)
+
+## With this unified graph W of size n x n, 
+## you can do either spectral clustering or Kernel NMF. 
+## If you need help with further clustering, please let us know. 
+
+## You can display clusters in the data by the following function
+## where C is the number of clusters.
+C = 2   							# number of clusters
+group = spectralClustering(W,C); 	# the final subtypes information
+
+## Get a matrix containing the group information 
+## for the samples such as the SpectralClustering result and the True label
+M_label=cbind(group,truelabel)
+colnames(M_label)=c("spectralClustering","TrueLabel")
+
+## ****
+## Comments
+## rownames(M_label)=names(spectralClustering) To add if the spectralClustering function 
+## pass the sample ID as names.
+## or rownames(M_label)=rownames(W) Having W with rownames and colmanes 
+## with smaple ID would help as well.
+## ***
+
+## Use the getColorsForGroups function to assign a color to each group
+## NB is more than 8 groups, you will have to input a vector 
+## of colors into the getColorsForGroups function
+M_label_colors=t(apply(M_label,1,getColorsForGroups))
+## or choose you own colors for each label, for example:
+M_label_colors=cbind("spectralClustering"=getColorsForGroups(M_label[,"spectralClustering"],
+colors=c("blue","green")),"TrueLabel"=getColorsForGroups(M_label[,"TrueLabel"],
+colors=c("orange","cyan")))
+
+## Visualize the clusters present in the given similarity matrix 
+## as well as some sample information
+## In this presentation no clustering method is ran the samples 
+## are ordered in function of their group label present in the group arguments
+displayClustersWithHeatmap(W, group, M_label_colors[,"spectralClustering"]) 
+displayClustersWithHeatmap(W, group, M_label_colors)
+}
diff --git a/man/dist2.Rd b/man/dist2.Rd
diff --git a/man/estimateNumberOfClustersGivenGraph.Rd b/man/estimateNumberOfClustersGivenGraph.Rd
diff --git a/man/getColorsForGroups.Rd b/man/getColorsForGroups.Rd
@@ -0,0 +1,117 @@
+\name{getColorsForGroups}
+\alias{getColorsForGroups}
+\title{
+Obtaining a vector of colors from a numeric vector of group
+}
+\description{
+Convert a numeric vector containing group information to a vector of colors
+}
+\usage{
+getColorsForGroups(group, colors)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{group}{
+A numeric vector containing the groups information such as the result of the spectralClustering function.
+}
+  \item{colors}{
+a vector of colors to be used for the different groups. If the number of group is > 8, the user will have to use the colors argument and give a vector of colors with length at least equal to the number of groups.
+}
+}
+\details{
+Essentially used to construct a vector or a matrix with colors used as for the ColSideColors argument in the displayClustersWithHeatmap function. See the displayClustersWithHeatmap()'s example.
+}
+\value{
+A character vector of colors, corresponding to the given vector of group, keeping the same order.
+}
+\author{
+Florence Cavalli
+}
+\examples{
+## Example 1
+gp=c(rep(1,10),rep(2,4),rep(1,3),rep(3,6))
+## Using the default colors
+gp_colors=getColorsForGroups(gp)
+gp_colors
+## Specifying the colors
+gp_colors=getColorsForGroups(gp,colors=c("cyan","purple","orange"))
+gp_colors
+
+## Example 2: Part of SNF
+## First, set all the parameters:
+K = 20;    # number of neighbors, usually (10~30)
+alpha = 0.5;    # hyperparameter, usually (0.3~0.8)
+T = 20;   # Number of Iterations, usually (10~20)
+
+## Data1 is of size n x d_1, 
+## where n is the number of patients, d_1 is the number of genes, 
+## Data2 is of size n x d_2, 
+## where n is the number of patients, d_2 is the number of methylation
+data(Data1)
+data(Data2)
+
+## Here, the simulation data (SNFdata) has two data types. They are complementary to each other. 
+## And two data types have the same number of points. 
+## The first half data belongs to the first cluster; the rest belongs to the second cluster.
+truelabel = c(matrix(1,100,1),matrix(2,100,1)); ## the ground truth of the simulated data
+
+## Calculate distance matrices
+## (here we calculate Euclidean Distance, you can use other distance, e.g,correlation)
+
+## If the data are all continuous values, we recommend the users to perform 
+## standard normalization before using SNF, 
+## though it is optional depending on the data the users want to use.  
+# Data1 = standardNormalization(Data1);
+# Data2 = standardNormalization(Data2);
+
+## Calculate the pair-wise distance; 
+## If the data is continuous, we recommend to use the function "dist2" as follows 
+Dist1 = dist2(as.matrix(Data1),as.matrix(Data1));
+Dist2 = dist2(as.matrix(Data2),as.matrix(Data2));
+
+## next, construct similarity graphs
+W1 = affinityMatrix(Dist1, K, alpha)
+W2 = affinityMatrix(Dist2, K, alpha)
+
+## next, we fuse all the graphs
+## then the overall matrix can be computed by similarity network fusion(SNF):
+W = SNF(list(W1,W2), K, T)
+
+## With this unified graph W of size n x n, 
+## you can do either spectral clustering or Kernel NMF. 
+## If you need help with further clustering, please let us know. 
+
+## You can display clusters in the data by the following function
+## where C is the number of clusters.
+C = 2     						# number of clusters
+group = spectralClustering(W,C); 	# the final subtypes information
+
+## Get a matrix containing the group information 
+## for the samples such as the SpectralClustering result and the True label
+M_label=cbind(group,truelabel)
+colnames(M_label)=c("spectralClustering","TrueLabel")
+
+## ****
+## Comments
+## rownames(M_label)=names(spectralClustering) To add if the spectralClustering function 
+## pass the sample ID as names.
+## or rownames(M_label)=rownames(W) Having W with rownames and colmanes 
+## with smaple ID would help as well.
+## ***
+
+## Use the getColorsForGroups function to assign a color to each group
+## NB is more than 8 groups, you will have to input a vector 
+## of colors into the getColorsForGroups function
+M_label_colors=t(apply(M_label,1,getColorsForGroups))
+## or choose you own colors for each label, for example:
+M_label_colors=cbind("spectralClustering"=getColorsForGroups(M_label[,"spectralClustering"],
+colors=c("blue","green")),"TrueLabel"=getColorsForGroups(M_label[,"TrueLabel"],
+colors=c("orange","cyan")))
+
+## Visualize the clusters present in the given similarity matrix 
+## as well as some sample information
+## In this presentation no clustering method is ran the samples 
+## are ordered in function of their group label present in the group arguments
+displayClustersWithHeatmap(W, group, M_label_colors[,"spectralClustering"]) 
+displayClustersWithHeatmap(W, group, M_label_colors)
+}
diff --git a/man/groupPredict.Rd b/man/groupPredict.Rd
diff --git a/man/internal.Rd b/man/internal.Rd
diff --git a/man/label.Rd b/man/label.Rd