-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathptrs.R
99 lines (77 loc) · 3.79 KB
/
ptrs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# -- masthead
masthead = paste("**********************************
Polytranscript risk scoring (PTRS)
Jonathan L. Hess, PhD
SUNY Upstate Medical University
Syracuse, NY, USA
e-mail: hessjo@upstate.edu
**********************************")
# print masthead
cat(masthead)
# --- load packages
suppressMessages(require(plyr))
suppressMessages(require(data.table))
# -- Load gene weights for a phentype and format columns
load_weights = function(file, gene_col, weight_col, p_col, sep="\t", header = T, calc_weight = FALSE, beta_col = '', se_col = ''){
# -- check parameters exist
if(is.null(file) == TRUE){stop("Weight file required!")}
if(is.null(gene_col) == TRUE){stop("Column denoting gene name required!")}
if(is.null(p_col) == TRUE){stop("Column denoting p-value required!")}
# -- load file
load_file = fread(file, header = header, sep=sep)
weight_file <<- load_file
if(calc_weight == FALSE){
if(is.null(weight_col) == TRUE){stop("Column denoting weight required!")}
# -- keep columns required for scoring
munge_file <<- load_file[,colnames(load_file) %in% c(gene_col, weight_col, p_col),with=FALSE]
# -- rename columns
names(munge_file)[names(munge_file) %in% gene_col] = "gene_id"
names(munge_file)[names(munge_file) %in% weight_col] = "weight"
names(munge_file)[names(munge_file) %in% p_col] = "pvalue"
}
if(calc_weight == TRUE){
if(beta_col == ""){stop("Warning! Column with beta coefficient required to calculate weight.")}
if(se_col == ""){stop("Warning! Column with standard error required to calculate weight.")}
load_file$weight = load_file[,colnames(load_file) %in% beta_col,with=FALSE]/load_file[,colnames(load_file) %in% se_col,with=FALSE]
munge_file <<- load_file[,colnames(load_file) %in% c(gene_col, "weight", p_col),with=FALSE]
# -- rename columns
names(munge_file)[names(munge_file) %in% gene_col] = "gene_id"
names(munge_file)[names(munge_file) %in% p_col] = "pvalue"
}
# -- object available to environment
return(munge_file[order(munge_file$pvalue, decreasing = FALSE), ])
}
# Function to calculate risk scores using PTRS algorithm
ptrs = function(dat = NULL, weight_table = NULL, p_thres = c(0.001, 0.01, 0.05, 0.1, 0.5, 1.0), scale_scores = TRUE){
# -- check existance of parameters
if(is.null(dat) == TRUE){stop("Data frame with gene expression data required!")}
if(is.null(weight_table) == FALSE) {
ptrs_weights = weight_table
} else {stop("Warning! A data frame with gene weights is required.")}
# -- format as data frame
dat = data.frame(dat)
# -- check that rownames match gene_id column in ptrs_weights
common_gene_id = intersect(rownames(dat), ptrs_weights$gene_id)
if(length(common_gene_id) < 1){stop("No common gene IDs were found! Please check that gene IDs are in same style between target data set and weights file.")}
# -- reorder dat according to munge_file
dat = dat[rownames(dat) %in% common_gene_id, ]
munge_file = ptrs_weights[ptrs_weights$gene_id %in% common_gene_id, ]
if(nrow(munge_file) != nrow(dat)){stop("Unexpected number of rows. Exiting.")}
dat = dat[match(munge_file$gene_id, rownames(dat)), ]
# -- score across thresholds
scores = list()
for(x in 1:length(p_thres)){
message("\rScoring threshold: ", p_thres[[x]])
weight_sub = munge_file[munge_file$pvalue <= p_thres[[x]], ]
dat_sub = dat[rownames(dat) %in% weight_sub$gene_id, ]
dat_sub = data.frame(t(dat_sub))
scoreMatrix = sweep(dat_sub, MARGIN = 2, STATS = weight_sub$weight, `*`)
if(scale_scores == TRUE){
scores[[x]] = scale(rowSums(scoreMatrix, na.rm=TRUE))} else {
scores[[x]] = rowSums(scoreMatrix, na.rm=TRUE)
}
}
score_df = data.frame(do.call(cbind, scores))
colnames(score_df) = paste("ptrs_",1:length(p_thres), sep="")
return(score_df)
}