-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbcr_graphs.R
159 lines (137 loc) · 5.56 KB
/
bcr_graphs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# Julian Q. Zhou
# https://github.com/julianqz
#' Create BCR network for B cell clones using igraph
#'
#' @param db A `data.frame` containing BCR data.
#' @param col_clone Column name containing clone IDs.
#' @param col_seq Column name for sequences used for computing edges.
#' @param threshold Hamming distance threshold used for computing edges.
#' @param nt_or_aa Whether to use nucleotide or aa distances. Either `nt` or `aa`.
#' @param bool_norm Whether to normalize distance by length of sequence. Boolean.
#' @param bool_mst Whether to convert graph into minimum spanning tree. Boolean.
#'
#' @return An igraph graph object.
#'
#' @details This function solely handles graph building and does not
#' handle aesthetics.
#'
get_bcr_network = function(db, col_clone, col_seq,
threshold, nt_or_aa=c("nt", "aa"),
bool_norm=TRUE, bool_mst=TRUE) {
suppressPackageStartupMessages(require(igraph))
suppressPackageStartupMessages(require(alakazam))
# related functions
# ?graph_from_adj_list
# ?make_empty_graph
# ?add_edges
# ?vertex_attr_names
# checks
stopifnot(nrow(db)>0)
stopifnot(all( c(col_clone, col_seq) %in% colnames(db) ))
stopifnot(threshold>=0)
stopifnot(nt_or_aa %in% c("nt", "aa"))
if (bool_norm) { stopifnot(threshold<=1) }
# initialize an undirected graph with nrow(db) vertices with no edges
g = make_empty_graph(n=nrow(db), directed=F)
# add edges
# loop thru clones
# draw edge if (normalized) hamming distance (nt/aa) btw col_seq (eg. CDR3)
# is within (<=) threshold
cat("Calculating edges based on",
ifelse(bool_norm, "normalized", ""),
nt_or_aa, "Hamming distance\nbetween `", col_seq,
"` of clonal members, using a threshold of", threshold, "...\n")
for (cl in unique(db[[col_clone]])) {
# wrt db
# these are the same integer IDs of vertices
idx = which(db[[col_clone]]==cl)
# if more than 1 clonal member
if (length(idx)>1) {
# distance matrix
if (nt_or_aa=="nt") {
mtx_ham = pairwiseDist(db[[col_seq]][idx],
dist_mat=alakazam::getDNAMatrix())
} else {
mtx_ham = pairwiseDist(db[[col_seq]][idx],
dist_mat=alakazam::getAAMatrix())
}
# normalize by length?
if (bool_norm) {
mtx_ham = mtx_ham / nchar(db[[col_seq]][idx][1])
}
# binarize
mtx_ham_bin = mtx_ham <= threshold
# initialize list holder for edges
lst_edges = vector(mode="list",
length=sum(mtx_ham_bin[upper.tri(mtx_ham_bin, diag=F)]))
i_edge = 1
for (i_row in 1:length(idx)) {
for (i_col in 1:length(idx)) {
if (i_col > i_row) {
#cat(i_row, i_col, "\n")
if (mtx_ham_bin[i_row, i_col]) {
lst_edges[[i_edge]] = c(idx[i_row], idx[i_col])
i_edge = i_edge + 1
}
}
}
}
# +1 because after adding last edge, i_edge still undergoes +1
stopifnot( i_edge == length(lst_edges)+1 )
# add edges to g
g = add_edges(graph=g, edges=unlist(lst_edges))
rm(lst_edges, mtx_ham, mtx_ham_bin)
}
rm(idx)
}
# minimum spanning tree?
if (bool_mst) {
cat("Converting graph to minimum spanning tree...\n")
g = mst(graph=g)
}
cat("Done.\n")
# sanity check
stopifnot( length(V(g)) == nrow(db) )
return(g)
}
#' adapted from ?igraph::shapes (changed marked by #*)
#' generic star vertex shape, with a parameter for number of rays
#'
#' @exapmles
#' # no clipping, edges will be below the vertices anyway
#' add_shape("star", clip=shape_noclip,
#' plot=mystar,
#' parameters=list(vertex.norays=5, vertex.frame.color="transparent"))
#' g <- make_ring(length(shapes))
#' plot(g, vertex.shape="star", vertex.color=rainbow(vcount(g)),
#' vertex.size=seq(10,20,length=vcount(g)))
#' plot(g, vertex.shape="star", vertex.color=rainbow(vcount(g)),
#' vertex.size=seq(10,20,length=vcount(g)),
#' vertex.norays=rep(4:8, length=vcount(g)))
#'
mystar <- function(coords, v=NULL, params) {
vertex.color <- params("vertex", "color")
if (length(vertex.color) != 1 && !is.null(v)) {
vertex.color <- vertex.color[v]
}
vertex.size <- 1/200 * params("vertex", "size")
if (length(vertex.size) != 1 && !is.null(v)) {
vertex.size <- vertex.size[v]
}
norays <- params("vertex", "norays")
if (length(norays) != 1 && !is.null(v)) {
norays <- norays[v]
}
#* added by JZ
vertex.frame.color <- params("vertex", "frame.color")
if (length(vertex.frame.color) != 1 && !is.null(v)) {
vertex.frame.color <- vertex.frame.color[v]
}
#* JZ added vertex.frame.color and fg
mapply(coords[,1], coords[,2], vertex.color, vertex.size, norays, vertex.frame.color,
FUN=function(x, y, bg, size, nor, fg) {
symbols(x=x, y=y, bg=bg, fg=fg,
stars=matrix(c(size,size/2), nrow=1, ncol=nor*2),
add=TRUE, inches=FALSE)
})
}