.Rhistory

stat_function(
fun = function(x) {
(dnorm(x, mean = mean, sd = sd)) * n * binwidth * lambda
}
)
},
mean = my_mix[["mu"]], #mean
sd = my_mix[["sigma"]], #standard deviation
lambda = my_mix[["lambda"]], #amplitude
n = length(observations$value), #sample size
binwidth = 0.05 #binwidth used for histogram
)
observations$value
observations
x=observations$value
tryhere<-  mapply(
function(mean, sd, lambda, n, binwidth) {
stat_function(
fun = function(x) {
(dnorm(x, mean = mean, sd = sd)) * n * binwidth * lambda
}
)
},
mean = my_mix[["mu"]], #mean
sd = my_mix[["sigma"]], #standard deviation
lambda = my_mix[["lambda"]], #amplitude
n = length(observations$value), #sample size
binwidth = 0.05 #binwidth used for histogram
)
tryhere
tryhere[[1]]
tryhere<-  mapply(
function(mean, sd, lambda, n, binwidth) {
fun = function(x) {
(dnorm(x, mean = mean, sd = sd)) * n * binwidth * lambda
}
},
mean = my_mix[["mu"]], #mean
sd = my_mix[["sigma"]], #standard deviation
lambda = my_mix[["lambda"]], #amplitude
n = length(observations$value), #sample size
binwidth = 0.05 #binwidth used for histogram
)
rtyhere
tryhere
tryhere[[1]]
?mapply
n = length(observations$value)
binwidth = 0.05
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["mu"]][1])) * n * binwidth * lambda
n = length(observations$value)
binwidth = 0.05
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["mu"]][1]) * n * binwidth * lambda
lambda()
lambda
lambda = my_mix[["lambda"]]
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["mu"]][1]) * n * binwidth * lambda
my_mix[["mu"]]
my_mix[["mu"]][1]
my_mix[["mu"]][1]
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sd"]][1]) * n * binwidth * lambda
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sigma"]][1])
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sigma"]][1]) * n * binwidth * lambda
n
binwidth
lambda
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sigma"]][1]) * n * binwidth * lambda[1]
range(dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sigma"]][1]) * n * binwidth * lambda[1])
binwidth
dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sigma"]][1]) * n * binwidth * lambda[1]
plot(dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sigma"]][1]) * n * binwidth * lambda[1])
y=dnorm(x=observations$value, mean = my_mix[["mu"]][1], sd = my_mix[["sigma"]][1]) * n * binwidth * lambda[1]
x=observations$value
plot(x,y)
range(y)
max(y)
set.seed(1)
plotrange <- c(-1,8)
d1 <- density(rchisq(1000, df=2), from=plotrange[1], to=plotrange[2])
d2 <- density(rchisq(1000, df=3)-1, from=plotrange[1], to=plotrange[2])
plot(d1)
lines(d2)
# look for points of intersection
poi <- which(diff(d1$y > d2$y) != 0)
# Mark those points with a circle:
points(x=d1$x[poi], y=d1$y[poi], col="red")
# or with lines:
abline(v=d1$x[poi], col="orange", lty=2)
abline(h=d1$y[poi], col="orange", lty=2)
poi <- which(diff(d1$y > d2$y) != 0)
poi
diff(d1$y > d2$y)
diff(d1$y > d2$y)
d1$y
diff(d1$y > d2$y)
d1$y > d2$y
m1=3.74
m2=11.84
std1=1.8
std2=2.92
solve<-function(m1,m2,std1,std2){
a = 1/(2*std1^2) - 1/(2*std2^2)
b = m2/(std2^2) - m1/(std1^2)
c = m1^2 /(2*std1^2) - m2^2 / (2*std2^2) - log(std2/std1)
}
solve(m1,m2,std1,std2 = )
solve(m1,m2,std1,std2 )
solve<-function(m1,m2,std1,std2){
a = 1/(2*std1^2) - 1/(2*std2^2)
b = m2/(std2^2) - m1/(std1^2)
c = m1^2 /(2*std1^2) - m2^2 / (2*std2^2) - log(std2/std1)
return(a,b,c)
}
solve(m1,m2,std1,std2 )
solve(m1,m2,std1,std2)
solve<-function(m1,m2,std1,std2){
a = 1/(2*std1^2) - 1/(2*std2^2)
b = m2/(std2^2) - m1/(std1^2)
c = m1^2 /(2*std1^2) - m2^2 / (2*std2^2) - log(std2/std1)
return(c(a,b,c))
}
solve(m1,m2,std1,std2)
intersect <- function(m1, sd1, m2, sd2, p1, p2){
B <- (m1/sd1^2 - m2/sd2^2)
A <- 0.5*(1/sd2^2 - 1/sd1^2)
C <- 0.5*(m2^2/sd2^2 - m1^2/sd1^2) - log((sd1/sd2)*(p2/p1))
if (A!=0){
(-B + c(1,-1)*sqrt(B^2 - 4*A*C))/(2*A)
} else {-C/B}
}
m1=0; sd1=2; m2=2.5; sd2=2; p1=.8; p2=.2
(is=intersect(m1,sd1,m2,sd2,p1,p2))
xs = seq(-6, 6, by=.01)
plot(xs, p1*dnorm(xs, m1, sd1), type= 'l')
lines(xs, .2*dnorm(xs, m2,sd2))
abline(v=is)
pp = sim_pois(300)
low_expr = c(10, 10)
high_expr = c(20, 50)
pp
low_expr
high_expr
library('devtools')
devtools::install_github('edsgard/trendsceek')
source("http://www.bioconductor.org/biocLite.R")
deps = c('BiocParallel', 'genefilter', 'DESeq2')
new_deps = deps[!(deps %in% installed.packages()[,"Package"])]
if(length(new_deps) != 0){biocLite(new_deps)}
source("http://www.bioconductor.org/biocLite.R")
deps = c('BiocParallel', 'genefilter', 'DESeq2')
new_deps = deps[!(deps %in% installed.packages()[,"Package"])]
if(length(new_deps) != 0){biocLite(new_deps)}
install.packages("clusmca")
install.packages("cluspca")
8*5
(8*5)/4
(8*5)*4
25/160
5/40
5/40*100
790/40
340*4
1000/8000
install.packages("clustvarsel")
library(clustvarsel)
?clustvarsel
require(MASS)
n <- 200
pro <- 0.5
mu1 <- c(0,0)
mu2 <- c(3,3)
sigma1 <- matrix(c(1,0.5,0.5,1),2,2,byrow=TRUE)
sigma2 <- matrix(c(1.5,-0.7,-0.7,1.5),2,2,byrow=TRUE)
X <- matrix(0, n, 5)
colnames(X) <- paste("X", 1:ncol(X), sep ="")
# generate the clustering variables
u <- runif(n)
Class <- ifelse(u < pro, 1, 2)
X[u < pro, 1:2]  <- mvrnorm(sum(u < pro), mu = mu1, Sigma = sigma1)
X[u >= pro, 1:2] <- mvrnorm(sum(u >= pro), mu = mu2, Sigma = sigma2)
# generate the non-clustering variables
X[,3] <- X[,1] + rnorm(n)
X[,4] <- rnorm(n, mean = 1.5, sd = 2)
X[,5] <- rnorm(n, mean = 2, sd = 1)
# plot the data
out <- clustvarsel(X, G = 1:5)
out <- clustvarsel(X, G = 1:5,itermax=50)
install.packages("vscc")
library(vscc)
?vscc
require("mclust")
data(banknote) #Load data
head(banknote[,-1]) #Show preview of full data set
bankrun <- vscc(banknote[,-1])
str(bankrun)
bankrun$topselected
head(banknote)
head(bankrun$topselected)
cor(c(1,2,3),c(3,2,1))
cor(c(1,2,3),c(1,2,3))
cor(c(1,2,3),c(3,1,2))
cor(c(1,2,3),c(3,1,2),method="spearman")
library(M3C)
c(0,1,0,1,0,1,0,1,0,1,0,1)
c(0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,)
status<-c(0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1,)
status<-c(0,1,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1,1,1)
status
sample(status,replacement=T)
sample(status,replace=T)
sample(status,replace=T)
sample(status,replace=T)
sample(status,replace=T)
sample(status,replace=T)
sample(status,replace=T)
table(status)
number_samples_negative_class<-table(dataset3_boot$outcome)[1]
number_samples_negative_class
number_samples_negative_class
number_samples_negative_class<-status(number_samples_negative_class)
number_samples_negative_class<-table(status)[1]
number_samples_negative_class
status
which(number_samples_negative_class==1)
which(number_samples_negative_class%in%1)
which(status %in% 1)
sample(which(status %in% 1),number_samples_negative_class)
library(heatmaply)
?hheatmaply
?heatmaply
# x <- heatmapr(mtcars)
library(heatmaply)
heatmaply(iris[,-5], k_row = 3, k_col = 2)
heatmaply(iris[,-5], k_row = 3, k_col = 2)
# x <- heatmapr(mtcars)
library(heatmaply)
heatmaply(iris[,-5], k_row = 3, k_col = 2)
plotly::heatmaply(iris[,-5], k_row = 3, k_col = 2)
library(heatmaply)
?hheatmaply
?heatmaply
library(heatmaply)
heatmaply(iris[,-5], k_row = 3, k_col = 2)
library(heatmaply)
heatmaply(iris[,-5])
pdf("test.pdf")
library(heatmaply)
heatmaply(iris[,-5], k_row = 3, k_col = 2)
x11()
library(heatmaply)
heatmaply(iris[,-5], k_row = 3, k_col = 2)
sessionInfo()
raw <- read.csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv")
str(raw)
y <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
zones<-c("q0","q0", "q0","q0","q0","q0","q0", "q0","q0","q0","q1","q1","q1","q1","q1","q1","q1","q1","q1","q1","q2","q2","q2","q2","q2","q2","q2","q2","q2","q2","q3","q3","q3","q3","q3","q3","q3","q3","q3","q3","q4","q4","q4","q4","q4","q4","q4","q4","q4","q4","q5","q5","q5","q5","q5","q5","q5","q5","q5","q5","q0","q0", "q0","q0","q0","q0","q0", "q0","q0","q0","q1","q1","q1","q1","q1","q1","q1","q1","q1","q1","q2","q2","q2","q2","q2","q2","q2","q2","q2","q2","q3","q3","q3","q3","q3","q3","q3","q3","q3","q3","q4","q4","q4","q4","q4","q4","q4","q4","q4","q4","q5","q5","q5","q5","q5","q5","q5","q5","q5","q5")
ratings <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
heart_rate <- c(76.42, 76.4, 76.4, 76.43, 76.5, 76.55, 76.62, 76.67, 76.72, 76.73, 95.27, 95.55, 95.87, 96.15, 96.43, 96.73, 97, 97.17, 97.33, 97.33, 107.52, 107.55, 107.57, 107.62, 107.67, 107.78, 107.92, 108.08, 108.25, 108.35, 76.42, 76.4, 76.4, 76.43, 76.5, 76.55, 76.62, 76.67, 76.72, 76.73, 108.52, 108.55, 108.57, 108.62, 108.67, 108.78, 108.92, 108.08, 108.25, 108.35, 75.42, 75.4, 75.4, 75.43, 75.5, 75.55, 75.62, 75.67, 75.72, 75.73, 78.42, 78.4, 78.4, 78.43, 78.5, 78.55, 78.62, 78.67, 78.72, 78.73, 98.27, 99.55, 99.87, 99.15, 99.43, 99.73, 99, 99.17, 99.33, 99.33, 109.52, 109.55, 109.57, 109.62, 109.67, 109.78, 109.92, 109.08, 109.25, 109.35, 96.42, 96.4, 96.4, 96.43, 96.5, 96.55, 96.62, 96.67, 96.72, 96.73, 107.52, 107.55, 107.57, 108.62, 109.67, 110.78, 110.92, 110.08, 110.25, 110.35, 85.42, 85.4, 85.4, 85.43, 85.5, 85.55, 85.62, 85.67, 85.72, 85.73)
participant <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
#Step 0.1 Create the dataframe
data_set=cbind(y,zones,ratings,heart_rate, participant)
columns = c("fatigue", "zones", "ratings","heart_rate","participant")
train_set = as.data.frame(data_set)
colnames(train_set) <- columns
str(train_set)
str(train_set)
train_set$partecipants
train_set$participant
library(shiny); source('Desktop/GMIEC-shiny-master/run_GMIEC.R')
source('Desktop/GMIEC-shiny-master/run_GMIEC.R')
library(shiny)
setwd("/Users/guidantonio/Desktop/github/GMIEC-shiny")
options(shiny.fullstacktrace=TRUE)
runApp(launch.browser=TRUE)
list_genes_dataset<-read.delim(file="Gene_List_DNA_Repair_pathways_Young_et_al2016.txt",header=T)
setwd("/Users/guidantonio/Desktop/gmt/example_data_gmiec_paper2")
input_GE2<-read.delim("tab_exp.txt",sep="\t",stringsAsFactors=F,check.names = F)
input_CNV2<-read.delim("tab_cna.txt",stringsAsFactors=F,check.names = F)
input_METH2<-read.delim("tab_meth.txt",stringsAsFactors=F,check.names = F)
input_MUTATION2<-read.delim("tab_mut_deleterious.txt",stringsAsFactors=F,check.names = F)
input_clinical2<-read.delim("tab_clinical.txt",stringsAsFactors=F,check.names = F)
drugs_for_analysis2<-read.delim("interactions_cancer.txt",stringsAsFactors=F,check.names = F)
colnames(drugs_for_analysis2)<-c("genes","drugs")
list_genes_dataset<-read.delim(file="Gene_List_DNA_Repair_pathways_Young_et_al2016.txt",header=T)
all_genes_for_analysis<-list_genes_dataset[,1]
input_GE_selected<-input_GE2[input_GE2[,1]%in%all_genes_for_analysis,]
input_CNV_selected<-input_CNV2[input_CNV2[,1]%in%all_genes_for_analysis,]
input_METH_selected<-input_METH2[input_METH2[,1]%in%all_genes_for_analysis,]
input_MUTATION_selected<-input_MUTATION2[input_MUTATION2[,1]%in%all_genes_for_analysis,]
check_ge_for_patients=input_GE_selected
input_CNV_selected=input_CNV_selected
input_METH_selected=input_METH_selected
input_MUTATION_selected=input_MUTATION_selected
tabDrugs=drugs_for_analysis2
input_clinical=input_clinical2
parameter_discr=c("2;1;0.7")
clusters=3
genes_annotated_TF_fv=FALSE
dim(input_MUTATION_selected)
input_MUTATION_selected
print(dim(input_MUTATION_selected))
#check columns of the genes
colnames(check_ge_for_patients)[1]<-"genesID"
colnames(input_CNV_selected)[1]<-"genesID"
colnames(input_METH_selected)[1]<-"genesID"
colnames(input_MUTATION_selected)[1]<-"genesID"
colnames(tabDrugs)[1]<-"genes"
#change id clinical data
colnames(input_clinical)[1]<-"SAMPLE_ID"
automatic_clusters=TRUE
annotate_clin=TRUE
library(ComplexHeatmap)
print(dim(input_MUTATION_selected))
#compute z-scores for gene expression data
check_ge_for_patients[,-1]<-log(check_ge_for_patients[,-1]+1,2)
check_ge_for_patients[,-1]<-apply(X=check_ge_for_patients[,-1],1,FUN=function(X){(X-mean(X))/sd(X)})
#check columns of the genes
colnames(check_ge_for_patients)[1]<-"genesID"
colnames(input_CNV_selected)[1]<-"genesID"
colnames(input_METH_selected)[1]<-"genesID"
colnames(input_MUTATION_selected)[1]<-"genesID"
colnames(tabDrugs)[1:2]<-c("genes","drug_primary_name")
if(annotate_clin==TRUE){
colnames(input_clinical)[1]<-"sample_id"
}
pts_exp_ge<-colnames(check_ge_for_patients[-1]) #the first column is the gene-name
pts_exp_cnv<-colnames(input_CNV_selected[-1])
pts_meth_cnv<-colnames(input_METH_selected[-1])
pts_mut<-colnames(input_MUTATION_selected[,2])#tumor sample barcode it is the mandatory colnames
print("Do")
list_pts_experiments<-list(ge=pts_exp_ge,cnv=pts_exp_cnv,meth=pts_meth_cnv,mut=pts_mut)
print("Step1: Run the analysis")
#ALL_samples_UNIQUE<-common_patient_GE_CNV_METH_MUT
ALL_samples_UNIQUE<-unique(as.character(unlist(list_pts_experiments)))
incProgress(0.15, detail = "Step1: Find common patients between all datasets")
print("Step2: Get the data for each patient")
incProgress(0.15, detail = "Step2: Get the data for each patient")
RES_ENGINE<-list()
for(asu in 1:length(ALL_samples_UNIQUE)){
se_patient_selection<-ALL_samples_UNIQUE[asu]
print("Step3: Extract the data from the patient")
print(se_patient_selection)
GE_current_patient<-check_ge_for_patients[,c("genesID",se_patient_selection)]
CNV_current_patient<-input_CNV_selected[,c("genesID",se_patient_selection)]
METH_current_patient<-input_METH_selected[,c("genesID",se_patient_selection)]
#it is possible that 1 patient does not have a gene
if(isTRUE(input_MUTATION_selected[,2]==se_patient_selection)){
MUT_current_patient<-input_MUTATION_selected[which(input_MUTATION_selected[,2]==se_patient_selection),]
} else {
print("i do not have a patient with mutations")
dfparse<-matrix(nrow=1,ncol=ncol(input_MUTATION_selected))
dfparse[1]<-"NA" #false gene
dfparse[2:ncol(input_MUTATION_selected)]<-as.numeric(0)
colnames(dfparse)<-colnames(input_MUTATION_selected)
MUT_current_patient<-dfparse
}
list_DF<-c("GE_current_patient","CNV_current_patient","METH_current_patient","MUT_current_patient")
#check the presence of empty data.frame
res_nrow<-NULL
for(dfe in list_DF){
current_df<-get(dfe)
res_nrow<-c(res_nrow,dim(current_df)[1])
}
#check2: control which samples does not have genes
nogenesindataset<-which(res_nrow==0)
if(length(nogenesindataset)==0){
list_DF_clean<-list_DF
DF_notpresent<-"AlldataAvailable"
} else{
list_DF_clean<-list_DF[-nogenesindataset]
DF_notpresent<-list_DF[nogenesindataset]
}
list_df_patients<-list()
for(ldfc in 1:length(list_DF_clean)){
list_df_patients[[ldfc]]<-get(list_DF_clean[ldfc])
}
###
### merge the different experiments for the same patient
###
merge_experiment_patient_df<-Reduce(function(...) merge(...,by="genesID",all=T),list_df_patients)
##
## Start to parse the object merge_experiment_patient_df
##
#find the index of columns with the string of patients: Select Data of Copy Number. -> Mutation does not have a column with the name of sample
index_mepd<-grep(colnames(merge_experiment_patient_df),pattern=se_patient_selection)
#create a new data.frame with hugo symbol and entrez and the values of experiments
dfPatientForAnalysis<-cbind(merge_experiment_patient_df[,1],merge_experiment_patient_df[index_mepd],DF_notpresent=rep(0,nrow(merge_experiment_patient_df)))
colnames(dfPatientForAnalysis)<-c("genesID",list_DF_clean)
#Change the last column, the experiment without data is always in the last column, see previously line of code
colnames(dfPatientForAnalysis)[ncol(dfPatientForAnalysis)]<-DF_notpresent
###
### just for test! dfPatientForAnalysis can be used for other analysis not Aprior.
###
print("Step4: ")
#remove NA values
dfPatientForAnalysis[is.na(dfPatientForAnalysis)]<-0
rownames(dfPatientForAnalysis)<-paste(dfPatientForAnalysis[,1],seq(1:nrow(dfPatientForAnalysis)))
##
##apply the rules
##
#create a data.frame with the update data
###
### Step1: Identify if the gene expression, cnv, methylation of TFs predicted expression of genes of interest
###
parameter_discr_unlist<-as.numeric(unlist(strsplit(parameter_discr,split=";")))
ge_d<-parameter_discr_unlist[1]
cnv_d<-parameter_discr_unlist[2]
meth_d<-parameter_discr_unlist[3]
results_for_tf2<-rules_notfor_tf(dfPatientForAnalysis=dfPatientForAnalysis,se_patient_selection=se_patient_selection,ge_d=ge_d,cnv_d=cnv_d,meth_d=meth_d,MUT_current_patient=MUT_current_patient)
dfPatientForAnalysis_GAC<-results_for_tf2[[1]] #this a data.frame with the values of omics experiments and binary rules
col_relTF<-results_for_tf2[[2]]
dfPatientForAnalysis_GAC_rel_TF<-dfPatientForAnalysis_GAC[,col_relTF]
rownames(dfPatientForAnalysis_GAC_rel_TF)<-dfPatientForAnalysis_GAC_rel_TF[,1]
#do a control, if the properties of the genes are always equal to 0 it you can remove these columns
#this step it is important to reduce the computational cost.
resSumControl<-apply(dfPatientForAnalysis_GAC_rel_TF[,2:ncol(dfPatientForAnalysis_GAC_rel_TF)],2,sum)
names.good.properties<-names(resSumControl[which(resSumControl!=0)])
input_for_klar<-cbind(genesID=dfPatientForAnalysis_GAC_rel_TF[,1],dfPatientForAnalysis_GAC_rel_TF[,names.good.properties])
input_for_klar2 <- data.frame(sapply(input_for_klar,as.factor))
###
### run the engine for the analysis
###
print("Step6: Analysis")
mergeGAC_COM_res_K_2<-engine_all_dataset(input_for_klar2,dfPatientForAnalysis_GAC=dfPatientForAnalysis_GAC,clusters)
print(colnames(mergeGAC_COM_res_K_2))
#search in which rows are present the genes in the table of drugs-genes interactions
#two columns in the tables of database must be present: genes and drug_primary_name
indexSubDrug<-which(tabDrugs$genes%in%mergeGAC_COM_res_K_2[,1])
subtabDrugs<-tabDrugs[indexSubDrug,]
#i use the symbol "#" to concatenate the strings, because when i will count the number of drugs for gene if are present "," inside the name of
#drugs i will obtain a mistake number of genes (es. 1-2ethyl,diol,benze)
collapseDrugTable<-aggregate(drug_primary_name ~ genes, data = subtabDrugs, paste,collapse = "#")
mergeGAC_COM_res_K_2_drugs<-merge(mergeGAC_COM_res_K_2,collapseDrugTable,by.x="genesID",by.y="genes",all.x=T)
countDrugsFunc<-function(x){
#drugs name are repeated for the same genes, i used unique to manage this issue: the reason is that different database have the same drugs.
ld<-as.numeric(length(unique(unlist(strsplit(x,split="#")))))
return(ld)
}
resCountDrugs<-as.numeric(sapply(mergeGAC_COM_res_K_2_drugs$drug_primary_name,countDrugsFunc))
resCountDrugs[which(is.na(mergeGAC_COM_res_K_2_drugs$drug_primary_name))]<-0
#test the druggability of a modules
mergeGAC_COM_res_K_2_drugs<-cbind(mergeGAC_COM_res_K_2_drugs,Count_Drugs_For_Gene=resCountDrugs)
print(colnames(mergeGAC_COM_res_K_2_drugs))
### estimate the druggability of the modules
# incProgress(0.15, detail = "Step4: Compute the scores")
TOTAL_score_module<-NULL
TOTAL_score_module_drugs<-NULL
mergeGAC_COM_res_K_2_drugs$clusters<-as.numeric(mergeGAC_COM_res_K_2_drugs$clusters)
print(colnames(mergeGAC_COM_res_K_2_drugs))
mergeGAC_COM_res_K_2_drugs<-mergeGAC_COM_res_K_2_drugs[order(mergeGAC_COM_res_K_2_drugs$clusters),]
print(colnames(mergeGAC_COM_res_K_2_drugs))
uniqGA<-as.numeric(unique(mergeGAC_COM_res_K_2_drugs$clusters))
for(mga in uniqGA){
print(mga)
smgcrkl<- mergeGAC_COM_res_K_2_drugs[mergeGAC_COM_res_K_2_drugs$clusters==mga,]
# $ genesID                  : Factor w/ 22569 levels "1-Dec","1-Mar",..: 22 72 108 152 242 284 293 294 315 368 ...
# $ FC_GE_TF                 : num  1 1 1 1 1 1 1 1 1 1 ...
# $ Genes_overexpressed      : num  0 0 0 0 0 0 0 0 0 0 ...
# $ Genes_underexpressed     : num  0 0 0 0 0 0 0 0 0 0 ...
#check the number of modules alterated: the first column is hugo symbol, the second is the fold-change between the expression (is not an alteration)
#of tf and target genes. I remove these columns because are not useful in the categorization of alterated and not alterated genes
# col_relTF
# [1] "genesID"                   "FC_GE_TF"                  "Genes_overexpressed"
# [4] "Genes_underexpressed"      "CNV_EC_gain"               "CNV_EC_depletion"
# [7] "CNV_gain"                  "CNV_depletion"             "CNV_TF_categorization_TF"
# [10] "METH_EC_hyper"             "METH_EC_hypo"              "METH_hyper"
# [13] "METH_hypo"                 "METH_TF_categorization_TF" "MUT_genes"
# [16] "MUT_TF"
alterated_genes_in_module<-apply(smgcrkl[,col_relTF][,-c(1:2)],1,FUN=function(x){ifelse(sum(x)==0,"Not_altered","Alterated")})
#count the number of not alterated genes in modules
check_alteration<-cbind(smgcrkl,alterated_genes_in_module)
nrow_module<-nrow(smgcrkl[,col_relTF][,-c(1:2)])
ncol_module<-ncol(smgcrkl[,col_relTF][,-c(1:2)])
#total size module
total_cell<-nrow_module*ncol_module
number_not_alteration_modules<-length(which(smgcrkl[,col_relTF][,-c(1:2)]==0))
number_alteration_modules<-length(which(smgcrkl[,col_relTF][,-c(1:2)]==1))
#estimate LNAM
ratio_na_inmodule_with_totsize<-number_not_alteration_modules/total_cell
#estimate LAM
ratio_alt_inmodule_with_totsize<-number_alteration_modules/total_cell
#estimate DELTA-A
#estimate the difference between the number of cell without alteration and with alteration in modules
#positive values indicate that the modules is again integrate otherwise not. [range-1,1]
#deltamodulesinalt<-ratio_na_inmodule_with_totsize-ratio_alt_inmodule_with_totsize
deltamodulesinalt<-ratio_alt_inmodule_with_totsize
scorestatusmodule<-as.numeric(rep(deltamodulesinalt,nrow_module))
TOTAL_score_module<-c(TOTAL_score_module,scorestatusmodule)
#estimate rdg
genes_with_drugs<-length(which(smgcrkl[,"Count_Drugs_For_Gene"]>=1))/nrow_module
#estimate NRDG
genes_without_drugs<-length(which(smgcrkl[,"Count_Drugs_For_Gene"]==0))/nrow_module
#estimate DELTA-D
#deltamodulegenesdrugs<-genes_with_drugs-genes_without_drugs
deltamodulegenesdrugs<-genes_with_drugs
deltamodulegenesdrugs <-rep(deltamodulegenesdrugs,nrow_module)
TOTAL_score_module_drugs<-c(TOTAL_score_module_drugs,deltamodulegenesdrugs)
}
#estimate SAD
mergeGAC_COM_res_K_2_drugs <-cbind(cbind(cbind(mergeGAC_COM_res_K_2_drugs,scorescorestatusmodule=TOTAL_score_module),TOTAL_score_module_drugs),combinedscore=TOTAL_score_module_drugs*TOTAL_score_module)
# > colnames(mergeGAC_COM_res_K_2_drugs)
# [1] "genesID"                   "FC_GE_TF"                  "Genes_overexpressed"
# [4] "Genes_underexpressed"      "CNV_depletion"
# scorescorestatusmodule
# TOTAL_score_module_drugs
# combinedscore
RES_ENGINE[[asu]]<-mergeGAC_COM_res_K_2_drugs
}
library(shiny)
setwd("/Users/guidantonio/Desktop/github/GMIEC-shiny")
options(shiny.fullstacktrace=TRUE)
runApp(launch.browser=TRUE)