forked from hillhillll/richclub
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrichclub.py
638 lines (533 loc) · 23.5 KB
/
richclub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
from numpy import where
def fast_RC(A, sequence=None):
from numpy import sum, argsort, arange
strengths = sum(A, axis=0)
if sequence is None:
sequence = argsort(strengths)
A = A[sequence][:,sequence]
x = [sum(A[n:,n:]) for n in arange(len(A))]
return x
class RC(object):
def __init__(self,g,scores=None,mode=2, ranks=None):
from numpy import argsort
self.g = g
if scores is None:
from numpy import array
self.scores = array(g.strength(weights='weight',mode=mode))
else:
self.scores = scores
self.order = argsort(self.scores)
self.n_nodes = g.vcount()
if ranks is not None:
self.ranks = ranks
from numpy import where
self.order = [where((self.ranks[r]<=self.scores) *
(self.scores<self.ranks[r+1]))[0] for r in range(len(self.ranks)-1)]
# from bisect import bisect_left
# self.tiers = [bisect_left(self.ranks, score) for score in self.scores]
# order = argsort(self.tiers)
# tier_breaks = concatenate(([0],where(diff(self.tiers))[0]+1))
# self.order = [order[tier_breaks[i]:tier_breaks[i+1]] for i in arange(len(tier_breaks)-1)]
def _ts(self, rank):
from numpy import where
s = where(self.scores<rank)[0]
self.scores = self.scores[self.scores>rank]
return s
def _rc(self, n):
try:
self.G.delete_vertices(map(str,n))
except TypeError:
self.G.delete_vertices(str(n))
return self.G
def phis(self,**kwargs):
f = lambda g: sum(g.es['weight'])
return self.fun(f,**kwargs)
def fun(self,f, ns=None):
if ns is None:
ns = self.n_nodes
self.G = self.g.copy()
self.G.vs['name'] = list(map(str,range(self.G.vcount())))
return [f(self._rc(n)) for n in self.order[:ns]]
def directed_spr(G, n_rewires=10, preserve='out', average_weight_by_node=False):
from numpy.random import randint
g = G.copy()
nes = len(g.es)
if not g.is_directed():
if 'weight' not in g.es.attributes():
#Just use igraph's built in rewiring.
g.rewire(n=n_rewires*nes)
return g
else:
raise ValueError("directed_spr currently does not support rewiring "
"of graphs that are both undirected and weighted. Have a good think "
"on the best ways to preserve the degree and strength sequence of an"
" undirected graph and get back to us with what you come up with.")
ValueError
if g.is_weighted() and average_weight_by_node:
from numpy import mean
for v in g.vs():
if preserve=='out':
edges = g.es.select(_source_in=[v.index])
elif preserve=='in':
edges = g.es.select(_target_in=[v.index])
edges["weight"] = mean(edges["weight"])
i = 0
rerolls = 0
while i < (n_rewires * nes):
if rerolls > n_rewires * nes:
print("Halted after %i failed rewirings and"\
"%i successful rewirings" % (rerolls, i))
return g
e1 = randint(nes)
e2 = randint(nes)
#In case we select the same edge twice, roll again.
if e1 == e2:
rerolls += 1
continue
s1 = g.es[e1].source
t1 = g.es[e1].target
a1 = g.es[e1].attributes()
s2 = g.es[e2].source
t2 = g.es[e2].target
a2 = g.es[e2].attributes()
#If s1 and s2 are the same, and we are preserving in strength, roll again.
#This is because rewiring wouldn't accomplish anything.
#The reverse is true if t1 and t2 are the same.
if (s1 == s2 and preserve == 'in') or (t1 == t2 and preserve == 'out'):
rerolls += 1
continue
#If either of the to-be-newly-wired connections already exist, roll again.
#This prevents multiple edges going in the same direction between two nodes.
if t2 in g.neighbors(s1, mode=1) or t1 in g.neighbors(s2, mode=1):
rerolls += 1
continue
g.delete_edges([e1, e2])
if preserve == 'out': # Rewire the outgoing connections
g.add_edge(s1, t2, **a1)
g.add_edge(s2, t1, **a2)
elif preserve == 'in': # Rewire the incoming connections
#Only difference is in the weight assignments
g.add_edge(s1, t2, **a2)
g.add_edge(s2, t1, **a1)
#Note that in the unweighted condition these two methods are equivalent, so either option
#for 'weighted' produces the same (correct) randomization, while preserving in degree
# and out degree for each node
i += 1
return g
def plot_rich_club(phis, ranks, ax=None, alpha=.1, **kwargs):
from numpy import shape
if len(shape(phis))>1:
multiple_samples = True
y = phis.mean(axis=0)
from numpy import std
error = std(phis, axis=0)
else:
multiple_samples = False
y = phis
error = 0
if not ax:
import matplotlib.pyplot as plt
plt.plot(ranks, y, **kwargs)
ax = plt.gca()
else:
ax.plot(ranks, y, **kwargs)
if multiple_samples:
ax.fill_between(ranks, y-error, y+error, alpha=alpha, **kwargs)
xlims = (min(ranks), max(ranks))
ax.set_xlim(xlims)
return ax
def threshold_score(scores, rank=90.0, mode='percentile', highest=True,
**kwargs):
if not highest and mode == 'percentile':
rank = 100 - rank
if mode == 'percentile':
from scipy.stats import scoreatpercentile
threshold_score = scoreatpercentile(scores, rank)
elif mode == 'raw':
threshold_score = rank
return threshold_score
def rich_nodes(graph, scores=None, highest=True, mode='percentile', **kwargs):
if scores is None:
scores = graph.degree()
ts = threshold_score(scores, highest=highest, mode=mode, **kwargs)
if highest:
return where(scores >= ts)[0]
else:
return where(scores <= ts)[0]
def richness_scores(graph, richness=None):
from types import FunctionType
if richness is None or richness == 'strength':
scores = graph.strength(graph.vs, mode=3, weights=graph.es["weight"])
elif richness == 'out_strength':
scores = graph.strength(graph.vs, mode=1, weights=graph.es["weight"])
elif richness == 'in_strength':
scores = graph.strength(graph.vs, mode=2, weights=graph.es["weight"])
elif richness=='degree':
scores = graph.degree()
elif type(richness)==FunctionType:
scores = richness(graph)
else:
try:
len(richness)
scores = richness
except TypeError:
raise ValueError("Unrecognized richness metric.")
return scores
def rich_club_coefficient(graph, richness=None,
club_property='C',
rank=None,
controls=None,
weightmax='max', candidate_edges_function=None,
directed_local_drawn_from='out_links',
mode='percentile',
**kwargs):
if not graph.is_weighted():
from numpy import ones
#If given an unweighted graph, "strength"="degree", so all edges
#have equal weight
graph = graph.copy()
graph.es["weight"] = ones(len(graph.es))
if weightmax=='max':
weightmax = max(graph.es["weight"])
scores = richness_scores(graph, richness=richness)
from types import FunctionType
if type(rank) == float:
rank = [rank]
if rank is None:
if mode=='percentile':
from numpy import arange
rank = arange(10.0, 100.0, 10.0)
elif mode=='raw':
from numpy import unique
rank = unique(scores)
from numpy import zeros
rc_coefficient = zeros(len(rank))
#return [sum(graph.subgraph(where(
# graph.strength(weights='weight',mode=2)>rank[i])[0]
# ).es['weight']) for i in range(len(rank))]
for i in range(len(rank)):
rich_node_indices = rich_nodes(
graph, rank=rank[i], scores=scores, mode=mode, **kwargs)
rich_subgraph = graph.subgraph(rich_node_indices)
if club_property==None or club_property=='C':
rc_coefficient[i] = sum(rich_subgraph.es["weight"])
elif type(club_property)==FunctionType:
rc_coefficient[i] = club_property(graph, rich_node_indices)
elif club_property.startswith('intensity'):
numerator = sum(rich_subgraph.es["weight"])
if numerator==0:
rc_coefficient[i] = 0
continue
if 'local' in club_property:
target_nodes = rich_node_indices
elif 'global' in club_property:
target_nodes = graph.vs
elif 'wm' in club_property:
target_nodes = rich_node_indices
else:
raise ValueError("Unrecognized club_property metric.")
if richness is None or richness == 'strength' or candidate_edges_function=='strength':
candidate_edges = graph.es.select(_between=(target_nodes, graph.vs))
elif richness == 'out_strength' or candidate_edges_function=='out_strength':
candidate_edges = graph.es.select(_source_in=target_nodes)
elif richness == 'in_strength' or candidate_edges_function=='in_strength':
candidate_edges = graph.es.select(_target_in=target_nodes)
elif candidate_edges_function:
candidate_edges = candidate_edges_function(graph)
else:
raise ValueError("Unrecognized richness metric for identifying "
"candidate edges to be used in calculating the denominator of "
"intensity properties. Supply a candidate_edges_function.")
if 'total' in club_property:
number_to_count = len(candidate_edges)
elif 'L' in club_property:
number_to_count = len(rich_subgraph.es)
elif 'P' in club_property:
n = len(rich_subgraph.vs)
number_to_count = n * (n - 1.0)
if not graph.is_directed():
number_to_count = number_to_count / 2.0
else:
raise ValueError("Unrecognized club_property metric.")
if 'wm' in club_property or 'weightmax' in club_property:
denominator = number_to_count * weightmax
# elif number_to_count > len(candidate_edges):
# print("Fewer links present in the network than are sought"
# " for with these settings. Using all available.")
# "Try using the 'L'"
# " setting instead.")
# from numpy import nan
# denominator = nan
elif 'local' in club_property:
#The local option includes a requirement that each rich node cannot
#contribute more links into the club than it can attach to all the
#other rich nodes.
#For directed networks, we must identify if the node is contributing
#out links or in links to the club.
n_nodes = len(graph.vs)
n_rich = len(rich_node_indices)
contribute_total = zeros(n_nodes)
denominator = 0
from numpy import argsort
weights = candidate_edges["weight"]
link_order = argsort(weights)
j = 0
limit = min(number_to_count, len(link_order))
while j < limit:
largest_link = int(link_order[j])
e = candidate_edges[largest_link]
if directed_local_drawn_from=='out_links':
home = e.source
elif directed_local_drawn_from=='in_links':
home = e.target
j += 1
if contribute_total[home] >= (n_rich-1):
continue
else:
denominator += e["weight"]
contribute_total[home] += 1
number_to_count -= 1
else:
from numpy import sort
candidate_edges = sort(candidate_edges["weight"])[::-1]
denominator = sum(candidate_edges[:number_to_count])
rc_coefficient[i] = numerator / denominator
elif club_property == 'n_components':
rc_coefficient[i] = len(rich_subgraph.components().subgraphs())
elif club_property == 'diameter_weighted':
rc_coefficient[i] = rich_subgraph.diameter(weights='weight')
elif club_property == 'diameter':
rc_coefficient[i] = rich_subgraph.diameter()
elif club_property == 'clustering_weighted':
rc_coefficient[i] = rich_subgraph.transitivity_avglocal_undirected(weights='weight')
elif club_property == 'clustering':
rc_coefficient[i] = rich_subgraph.transitivity_avglocal_undirected()
elif club_property == 'n_infomap':
infomap = rich_subgraph.community_infomap(edge_weights=rich_subgraph.es["weight"])
rc_coefficient[i] = infomap.cluster_graph().vcount()
elif club_property == 'q_infomap':
infomap = rich_subgraph.community_infomap(edge_weights=rich_subgraph.es["weight"])
rc_coefficient[i] = infomap.q
elif club_property == 'codelength':
infomap = rich_subgraph.community_infomap(edge_weights=rich_subgraph.es["weight"])
rc_coefficient[i] = infomap.codelength
else:
raise ValueError("Unrecognized club_property option.")
if controls is None:
return rc_coefficient
else:
from igraph import Graph
from numpy import ndarray
if ~(type(controls) == list or type(controls) == ndarray):
controls = list(controls)
control_rc_coefficient = zeros(len(rank))
for i in range(len(controls)):
control_graph = controls[i]
if type(control_graph) != Graph:
#If the graph isn't an igraph Graph, we'll turn it into a list, if it's
#not already one.
if type(control_graph) != list:
if type(control_graph)==ndarray:
control_graph = control_graph.tolist()
else:
from scipy.sparse import csc
if type(control_graph) == csc.csc_matrix:
control_graph - control_graph.toarray().tolist()
else:
raise TypeError("Can't parse the control"
"graph type.")
from numpy import transpose, all
if all(control_graph==transpose(control_graph)):
directed_mode = 1
else:
directed_mode = 0
control_graph = Graph.Weighted_Adjacency(control_graph,
mode=directed_mode)
control_rc_coefficient = control_rc_coefficient +\
rich_club_coefficient(
control_graph,
club_property=club_property,
rank=rank,
controls=None,
mode=mode,
weightmax=weightmax,
candidate_edges_function=candidate_edges_function,
directed_local_drawn_from=directed_local_drawn_from,
**kwargs)
control_rc_coefficient = control_rc_coefficient / len(controls)
return rc_coefficient / control_rc_coefficient
def normalized_rich_club_coefficient(graph, rewire=10, average=1, control=None,
preserve=None, rank=None, **kwargs):
if type(rank) == float:
rank = [rank]
if rank is None:
from numpy import arange
rank = arange(10.0, 100.0, 10.0)
rc_coefficient = rich_club_coefficient(graph, rank=rank, **kwargs)
from numpy import zeros
if control is None and not preserve:
if not graph.is_weighted():
#For unweighted graphs, preserving out strength vs in strength does
#the same thing, so we'll just assign something to use.
preserve = 'out'
elif kwargs['richness'] == 'out_strength':
preserve = 'out'
elif kwargs['richness'] == 'in_strength':
preserve = 'in'
elif kwargs['richness'] is None or kwargs['richness'] == 'strength':
#Will not assign preserve here, because if the graph is actually weighted
#the correct normalization is domain specific. ie. Up to the user!
raise ValueError("Must provide explicit control graphs or rewiring"
"preservation for this richness option.")
if control is not None:
from igraph import Graph
from numpy import ndarray
if ~(type(control) == list or type(control) == ndarray):
control = list(control)
control_rc_coefficient = zeros(len(rank))
for i in range(len(control)):
control_graph = control[i]
if type(control_graph) != list:
if type(control_graph)==ndarray:
control_graph = control_graph.tolist()
else:
from scipy.sparse import csc
if type(control_graph) == csc.csc_matrix:
control_graph - control_graph.toarray().tolist()
else:
raise TypeError("Can't parse the control graph type.")
from numpy import transpose, all
if all(control_graph==transpose(control_graph)):
mode = 1
else:
mode = 0
control_graph = Graph.Weighted_Adjacency(control_graph, mode=mode)
control_rc_coefficient = control_rc_coefficient +\
rich_club_coefficient(
control_graph, rank=rank, **kwargs)
control_rc_coefficient = control_rc_coefficient / len(control)
return rc_coefficient / control_rc_coefficient
elif rewire:
control_rc_coefficient = zeros(len(rank))
for i in range(average):
random_graph = directed_spr(
graph, n_rewires=rewire, preserve=preserve)
control_rc_coefficient = control_rc_coefficient +\
rich_club_coefficient(
random_graph, rank=rank, **kwargs)
control_rc_coefficient = control_rc_coefficient / average
return rc_coefficient / control_rc_coefficient
else:
raise ValueError("Must provide explicit control graphs if"
"rewiring option is deactivated.")
def preserve_strength(g, randomize_topology=False, preserve_mode='estimate_both', permute_strength=True, randomize_method='vl'):
from numpy import array, ndarray
from scipy.sparse import csc
if (type(g)==ndarray or type(g)==csc.csc_matrix) and preserve_mode in ['estimate_both', 3] and not randomize_topology:
A = g
from numpy import sum
out_strength = sum(A, axis=1)
in_strength = sum(A, axis=0)
adj = 1*(A>0)
out_degree = sum(adj, axis=1)
in_degree = sum(adj, axis=0)
if permute_strength:
from numpy.random import permutation
from numpy import unique
out_in_sequence = array(list(zip(out_degree, in_degree)))
for k in unique(out_in_sequence):
k_ind = where((out_in_sequence == k).all(axis=1))[0]
new_ind = permutation(k_ind)
out_strength[k_ind] = out_strength[new_ind]
in_strength[k_ind] = in_strength[new_ind]
from numpy import mean, outer, logical_not
mean_k = mean([out_degree,in_degree])
mean_s = mean([out_strength,in_strength])
G = (mean_k/mean_s) * outer(out_strength,in_strength) /outer(out_degree,in_degree)
G[logical_not(adj)] = 0
return G
out_degree = g.degree(mode=1)
in_degree = g.degree(mode=2)
if randomize_topology:
if g.is_directed():
#Check if all edges are bidirectional.
#If so, create a random graph with only bidirectional edges.
G = g.copy()
G.to_undirected(mode=False)
if all(array(G.count_multiple())==2):
G = g.Degree_Sequence(out_degree, method=randomize_method)
G.to_directed()
else:
G = g.copy()
G.rewire()
else:
G = g.Degree_Sequence(out_degree, method=randomize_method)
else:
G = g.copy()
if preserve_mode in ['estimate_both', 3]:
out_strength = array(g.strength(mode=1, weights='weight'))
in_strength = array(g.strength(mode=2, weights='weight'))
if permute_strength:
from numpy.random import permutation
from numpy import unique
out_in_sequence = array(list(zip(out_degree, in_degree)))
for k in unique(out_in_sequence):
k_ind = where((out_in_sequence == k).all(axis=1))[0]
new_ind = permutation(k_ind)
out_strength[k_ind] = out_strength[new_ind]
in_strength[k_ind] = in_strength[new_ind]
from numpy import mean
mean_k = mean([out_degree,in_degree])
mean_s = mean([out_strength,in_strength])
for e in G.es:
e["weight"] = ( (mean_k/mean_s) * out_strength[e.source] *
in_strength[e.target] /
(out_degree[e.source]*in_degree[e.target]) )
return G
elif preserve_mode in ['out', 1]:
preserve_mode = 1
elif preserve_mode in ['in', 2]:
preserve_mode = 2
from numpy import sum
ind = [g.incident(v,mode=preserve_mode) for v in range(g.vcount())]
weights = g.es[sum(ind)]['weight']
from numpy.random import shuffle
map(shuffle,ind)
G.es[sum(ind)]['weight'] = weights
return G
def just_plot_rc(rc, rc_controls, ax=None, x=None, top_limit=None, **kwargs):
from numpy import array, mean
from numpy.ma import masked_invalid
if x is None:
from numpy import arange
x = arange(len(rc))
if ax is None:
ax = gca()
from scipy.stats import scoreatpercentile
upper95 = array([scoreatpercentile(rc_controls[:,i], 97.5) for i in range(len(x))])
lower95 = array([scoreatpercentile(rc_controls[:,i], 2.5) for i in range(len(x))])
control_mean = mean(rc_controls,axis=0)
rc_norm = rc/control_mean
rc_norm = masked_invalid(rc_norm)
ax.plot(x, rc_norm, linewidth=1, color='k')
ax.plot(ax.get_xlim(), (1,1), linestyle='--', color='k')
ax.set_ylim(bottom=0)
if ax.get_ylim()[1]<2:
ax.set_ylim(top=2)
if top_limit:
ax.set_ylim(top=top_limit)
ax.set_ylabel(r"$RC_{norm}$")#+"\n (normalized rich club coefficient)")
ax.fill_between(x, 1,rc_norm, where=(rc>upper95) | (rc<lower95), **kwargs)
return ax
def graph_from_sparse(data, directed=None):
from igraph import Graph
sources, targets = data.nonzero()
if directed==None:
from numpy import all
directed = not all(data[sources, targets]==data[targets, sources])
from numpy import array
g = Graph(zip(sources, targets), directed=directed, edge_attrs={'weight': array(data[sources, targets])[0]})
if g.is_directed():
return g
else:
return g.simplify(combine_edges="first")