-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompute_snoopdogg_number_bfs.py
51 lines (42 loc) · 2.09 KB
/
compute_snoopdogg_number_bfs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/python2
# This script computes every artist's Snoop Dogg Number their shortest path to Snoop Dogg by
# performing a breadth-first traversal and computing the results in a single pass of the vertices.
# This method can only be applied to the unweighted graph.
#
# This script runs in O(|E|) as far as I know. If that's true, I expect it to run in linear time
# for my purposes because the music collaboration graphs I'm working with are sparse and will never
# come close to being fully connected. This script took 20 seconds to run on an i7-6700k.
import psycopg2
import networkx as nx
# Connect to the MusicBrainz database and load graph from disk
connection = psycopg2.connect(database="musicbrainz", user="musicbrainz", password="", host="musicbrainz", port="5432")
cursor = connection.cursor()
graph = nx.read_gexf("graph/sdn-unweighted.gexf")
# Prepare the database
cursor.execute("DROP TABLE IF EXISTS snoopdogg_number_bfs;")
cursor.execute("""
CREATE TABLE snoopdogg_number_bfs (
artist TEXT NOT NULL,
distance INTEGER NOT NULL,
path TEXT NOT NULL,
PRIMARY KEY(artist)
);
""")
# Initialize dictionary with the Snoop Dogg as the base case
# TODO: Create class for storing artists' SDN and path.
sdn = {"Snoop Dogg" : (0, ["Snoop Dogg"])}
# Traverse the graph breadth-first and compute every artist's Snoop Dogg Number in O(V + E)
for edge in nx.bfs_edges(graph, "Snoop Dogg"):
parent = edge[0]
child = edge[1]
dist_to_snoopdogg = sdn[parent][0] + 1
path_to_snoopdogg = sdn[parent][1] + [child]
sdn[child] = (dist_to_snoopdogg, path_to_snoopdogg)
# Insert the data via one long query - this is an order of magnitude faster than one query per row
data_string = ','.join(cursor.mogrify('(%s,%s,%s)', (artist, sdn[artist][0], sdn[artist][1])) for artist in sdn) # mogrify requires python2
cursor.execute('INSERT INTO snoopdogg_number_bfs VALUES ' + data_string)
# TODO: Run query that adds all the artists from "nodes" table that have no path to Snoop Dogg.
# Apply all changes to the database
connection.commit()
connection.close()
print("Done!")