-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPredictive Modelling Using Lasso.R
109 lines (82 loc) · 2.94 KB
/
Predictive Modelling Using Lasso.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
phy_std <- readRDS("/media/edwin/Transcend/datasets/dada2/meta_analysis/network/NET/Mf_processed_phyloseq.RDS")
setwd("/media/edwin/Transcend/datasets/dada2/meta_analysis/ml/new")
library(SIAMCAT)
data("feat_crc_zeller", package="SIAMCAT")
data("meta_crc_zeller", package="SIAMCAT")
taxa_names(phy_std) <- tax_table(phy_std)[,"Species"]
condition1 <- "Metaplasia"
condition2 <- "Cancer"
doc <- paste(condition1,condition2, sep = "_")
# c(condition1,condition2)
phy_siam <- subset_samples(phy_std, Conditions %in% c(condition1, condition2))
###convert to relative abundance
physeq_rel_abund = phyloseq::transform_sample_counts(phy_siam, function(x){x / sum(x)})
# phyloseq::otu_table(phy_siam)[1:15, 1:15]
# phyloseq::otu_table(physeq_rel_abund)[1:15, 1:15]
label <- create.label(meta=sample_data(physeq_rel_abund), label="Conditions", case = condition2)
siamcat <- siamcat(phyloseq=physeq_rel_abund, label=label)
sc.obj <- filter.features(siamcat, cutoff=1e-04,
filter.method = 'abundance')
## Features successfully filtered
sc.obj <- filter.features(sc.obj, cutoff=0.05,
filter.method='prevalence',
feature.type = 'filtered')
## Features successfully filtered
sc.obj <- check.associations(sc.obj, detect.lim = 1e-06,
alpha=0.001,
max.show = 50,
plot.type = 'quantile.rect',
mult.corr = 'fdr',
sort.by = 'fc',
panels = c("fc", "prevalence", "auroc"),
verbose = 3,
fn.plot = paste("association_",doc,".pdf", sep = ""))
# sc.obj <- check.confounders(
# sc.obj,
# fn.plot = 'confounder_HG_plots.pdf',
# meta.in = NULL,
# feature.type = 'filtered'
# )
sc.obj <- normalize.features(
sc.obj,
norm.method = "log.unit",
norm.param = list(
log.n0 = 1e-06,
n.p = 2,
norm.margin = 1
)
)
sc.obj <- create.data.split(
sc.obj,
num.folds = 10,
num.resample = 20
)
Model_trained <- train.model(
sc.obj,
method = "randomForest",
param.set = 300
)
# get information about the model type
model_type(Model_trained)
## [1] "lasso"
# access the models
models <- models(Model_trained)
models[[1]]
## Model for learner.id=classif.cvglmnet; learner.class=classif.cvglmnet
## Trained on: task.id = data; obs = 112; features = 207
## Hyperparameters: nlambda=100,alpha=1
?make.predictions
sc.obj_try <- make.predictions(Model_trained, sc.obj, normalize.holdout=TRUE)
pred_matrix <- pred_matrix(sc.obj)
head(pred_matrix)
model_list(sc.obj_try) <- model_list(Model_trained)
sc.obj <- evaluate.predictions(sc.obj_try)
## Evaluated predictions successfully.
model.evaluation.plot(sc.obj)
model.interpretation.plot(
sc.obj,
fn.plot = paste("intepretation_",doc,".pdf", sep = ""),
consens.thres = 0.5,
limits = c(-3, 3),
heatmap.type = 'zscore',
)