-
Notifications
You must be signed in to change notification settings - Fork 0
/
CUE.impute.R
49 lines (36 loc) · 1.72 KB
/
CUE.impute.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
## Set the working directory
#setwd(“/YOUR_DIRECTORY/CUE”)
# Replace YOUR_DIRECTORY with your directory where you downloaded CUE.
root_dir = getwd()
## load sample dataset
load("PTSD/Sample_Dataset.RData")
X<-sample_data # Input HM450
m<-dim(X)[2] # number of samples # Number of smaples
## load required datasets
load("PTSD/Annotations.RData") # load Annotation data
load("Data/PTSD.neighbors.RData") # load All neighbors
load("Data/Probes.RData") # load All probes' names
## load required datasets for PTSD imputation
load("Data/PTSD_CpG_Best_method_list.RData") # load the best method list
load("PTSD/RF/best_RF.RData") # load neighbors for RF
load("PTSD/XGB/best_XGB.RData") # load neighbors for XGB
load("PTSD/KNN/best_KNN.RData") # load neighbors for KNN
load("PTSD/TCR/best_TCR.RData") # load neighbors for PFR
## load all the required packages
library(randomForest)
library(xgboost)
library(xgboost)
source("R/refund_lib.R")
## load the CUE imputation function
source("R/impute.R")
## Check the input
CUE_check(X) # check if the inuput X match the requirements
## TCR required input
temp <- TCR.input(X)
X_logit = temp[[1]] # Logit transformation make it better follows Gaussian distribution
test.funcs <- temp[[2]] # Pre-calculated sample-specific density function
## Imputation (Single CPU might takes 4-5 days to imputation for a middle sized methylation dataset (n=~100 samples).)
m.imputed<-CUE.impute(X,m,"PTSD")
## Save the imputed probes as y_impute.RData
setwd(root_dir)
save(m.imputed,file="y_impute.RData")