-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcascade_generator.py
executable file
·236 lines (194 loc) · 6.98 KB
/
cascade_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import random
import math
import numpy as np
from copy import copy
from graph_tool import Graph, GraphView, PropertyMap
from graph_tool.topology import shortest_distance, label_components
from graph_tool.search import bfs_search
from helpers import infected_nodes, timeout, sampling_weights_by_order
from graph_helpers import BFSNodeCollector, reverse_bfs
MAXINT = np.iinfo(np.int32).max
class CascadeTooSmall(Exception):
pass
def observe_cascade(c, source, q, method='uniform',
tree=None, source_includable=False):
"""
given a cascade `c` and `source`,
return a list of observed nodes according to probability `q`
"""
all_infection = np.nonzero(c != -1)[0]
if not source_includable:
all_infection = list(set(all_infection) - {source})
num_obs = int(math.ceil(len(all_infection) * q))
if num_obs < 2:
num_obs = 2
if method == 'uniform':
return np.random.permutation(all_infection)[:num_obs]
elif method == 'late':
return np.argsort(c)[-num_obs:]
elif method == 'leaves':
assert tree is not None, 'to get the leaves, the cascade tree is required'
# extract_steiner_tree(tree, )
nodes_in_order = reverse_bfs(tree)
return nodes_in_order[:num_obs]
elif method == 'bfs-head':
assert tree is not None, 'the cascade tree is required'
vis = BFSNodeCollector()
bfs_search(GraphView(tree, directed=False), source, vis)
sampling_weights_by_order
vis.nodes_in_order
return vis.nodes_in_order[:num_obs] # head
elif method == 'bfs-tail':
assert tree is not None, 'the cascade tree is required'
vis = BFSNodeCollector()
bfs_search(GraphView(tree, directed=False), source, vis)
return vis.nodes_in_order[-num_obs:] # tail
else:
raise ValueError('unknown method {}'.format(method))
@timeout(5)
def si(g, p, source=None, stop_fraction=0.5):
"""
g: the graph
p: edge-wise infection probability
stop_fraction: stopping if more than N x stop_fraction nodes are infected
"""
weighted = False
if isinstance(p, PropertyMap):
weighted = True
else:
# is float and uniform
assert 0 < p and p <= 1
if source is None:
source = random.choice(np.arange(g.num_vertices()))
infected = {source}
infection_times = np.ones(g.num_vertices()) * -1
infection_times[source] = 0
time = 0
edges = []
stop = False
infected_nodes_until_t = copy(infected)
while True:
infected_nodes_until_t = copy(infected)
# print('current cascade size: {}'.format(len(infected_nodes_until_t)))
time += 1
for i in infected_nodes_until_t:
vi = g.vertex(i)
for e in vi.all_edges():
if weighted:
inf_proba = p[e]
else:
inf_proba = p
vj = e.target()
j = int(vj)
rand = random.random()
# print('rand=', rand)
# print('inf_proba=', inf_proba)
# print('{} infected?'.format(j), j not in infected)
if j not in infected and rand <= inf_proba:
# print('SUCCESS')
infected.add(j)
infection_times[j] = time
edges.append((i, j))
# stop when enough nodes have been infected
if (len(infected) / g.num_vertices()) >= stop_fraction:
stop = True
break
if stop:
break
if stop:
break
tree = Graph(directed=True)
for _ in range(g.num_vertices()):
tree.add_vertex()
vertex_nodes = set()
for u, v in edges:
tree.add_edge(u, v)
vertex_nodes.add(u)
vertex_nodes.add(v)
vfilt = tree.new_vertex_property('bool')
vfilt.set_value(False)
vfilt.a[list(vertex_nodes)] = True
tree.set_vertex_filter(vfilt)
return source, infection_times, tree
def sample_graph_by_p(g, p):
"""
for IC model
graph_tool version of sampling a graph
mask the edge according to probability p and return the masked graph
g: the graph
p: float or np.array
"""
if isinstance(p, PropertyMap):
p = p.a
flags = (np.random.random(p.shape) <= p)
p = g.new_edge_property('bool')
p.set_2d_array(flags)
return GraphView(g, efilt=p)
def get_infection_time(g, source, return_edges=False):
"""for IC model
"""
time, pred_map = shortest_distance(g, source=source, pred_map=True)
time = np.array(time.a)
time[time == MAXINT] = -1
if return_edges:
edges = []
reached = infected_nodes(time)
for v in reached:
# print(v)
if pred_map[v] >= 0 and pred_map[v] != v:
edges.append((pred_map[v], v))
return time, edges
else:
return time
def ic(g, p, source=None,
stop_fraction=1.0,
return_tree_edges=False):
"""
graph_tool version of simulating cascade
return np.ndarray on vertices as the infection time in cascade
uninfected node has dist -1
stop_fraction: detemines how large the snapshot is.
"""
if source is None:
source = random.choice(np.arange(g.num_vertices(), dtype=int))
gv = sample_graph_by_p(g, p)
times = get_infection_time(gv, source, return_edges=False)
size = len(infected_nodes(times))
min_size = int(stop_fraction * g.num_vertices())
if size < min_size:
# size does not fit, early stopping to save time
raise CascadeTooSmall('{} < {}'.format(size, min_size))
stuff = get_infection_time(gv, source, return_edges=return_tree_edges)
if not return_tree_edges:
times = stuff
tree_edges = None
else:
times, tree_edges = stuff
# truncate the infection to fit size
times[times == -1] = (times.max() + 1)
uninfected = times.argsort()[min_size:]
times[uninfected] = -1
if tree_edges is not None:
inf_nodes = set(infected_nodes(times))
tree_edges = [e for e in tree_edges if e[0] in inf_nodes and e[1] in inf_nodes]
return source, times, tree_edges
def incremental_simulation(g, c, p, num_nodes, return_new_edges=False):
"""incrementally add edges to given cascade
num_nodes is passed bacause vfilt might be passed
"""
# print('incremental_simulation -> g', g)
gv = sample_graph_by_p(g, p)
new_infected_nodes = set(infected_nodes(c))
comp = label_components(gv)[0]
covered_cids = set()
for v in infected_nodes(c):
cid = comp[v]
if cid not in covered_cids:
new_infected_nodes |= set((comp.a == cid).nonzero()[0])
covered_cids.add(cid)
new_c = np.ones(g.num_vertices()) * (-1)
new_c[list(new_infected_nodes)] = 1
if return_new_edges:
raise Exception("`return_new_edges` not supported anymore")
else:
return new_c