-
Notifications
You must be signed in to change notification settings - Fork 0
/
graph.py
51 lines (42 loc) · 1.33 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import pickle
import re
class Graph(object):
def __init__(self):
self.data = {}
def add_edge(self, node, edge):
if self.has_node(node):
self.data[node].append(edge)
else:
self.data[node] = [edge]
def has_node(self, node):
return node in self.data
def search_references(self, pages):
references = {}
for node in pages:
if self.has_node(node):
references[node] = self.data[node]
return references
@staticmethod
def get_references(text):
references = []
pattern = r'page\s+(\d+)'
matches = re.findall(pattern, text)
for match in matches:
references.append(int(match) + 22)
return references
def build(self, hashmap):
for page_number, text in hashmap.items():
references = self.get_references(text)
if references:
for ref in references:
self.add_edge(page_number, ref)
else:
self.add_edge(page_number, None)
return self
def serialize(self, file_path):
with open(file_path, 'wb') as file:
pickle.dump(self, file)
@staticmethod
def deserialize(file_path):
with open(file_path, 'rb') as file:
return pickle.load(file)