-
Notifications
You must be signed in to change notification settings - Fork 2
/
pageRank.py
42 lines (34 loc) · 1.46 KB
/
pageRank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import operator
import math, random, sys, csv
def pagerank(graph, damping=0.85, epsilon=1.0e-8):
inlink_map = {}
outlink_counts = {}
def new_node(node):
if node not in inlink_map: inlink_map[node] = set()
if node not in outlink_counts: outlink_counts[node] = 0
for tail_node, head_node in graph:
new_node(tail_node)
new_node(head_node)
if tail_node == head_node: continue
if tail_node not in inlink_map[head_node]:
inlink_map[head_node].add(tail_node)
outlink_counts[tail_node] += 1
all_nodes = set(inlink_map.keys())
for node, outlink_count in outlink_counts.items():
if outlink_count == 0:
outlink_counts[node] = len(all_nodes)
for l_node in all_nodes: inlink_map[l_node].add(node)
initial_value = 1 / len(all_nodes)
ranks = {}
for node in inlink_map.keys(): ranks[node] = initial_value
new_ranks = {}
delta = 1.0
n_iterations = 0
while delta > epsilon:
new_ranks = {}
for node, inlinks in inlink_map.items():
new_ranks[node] = ((1 - damping) / len(all_nodes)) + (damping * sum(ranks[inlink] / outlink_counts[inlink] for inlink in inlinks))
delta = sum(abs(new_ranks[node] - ranks[node]) for node in new_ranks.keys())
ranks, new_ranks = new_ranks, ranks
n_iterations += 1
return ranks, n_iterations