-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path3.2 Clustering (Krukal MST using UFS).py
101 lines (71 loc) · 2.88 KB
/
3.2 Clustering (Krukal MST using UFS).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
1.Question 1
In this programming problem and the next you'll code up the clustering algorithm from lecture for computing a max-spacing k-clustering.
This file (clustering1.txt) describes a distance function (equivalently, a complete graph with edge costs). It has the following format:
[number_of_nodes]
[edge 1 node 1] [edge 1 node 2] [edge 1 cost]
[edge 2 node 1] [edge 2 node 2] [edge 2 cost]
...
There is one edge (i,j)(i,j) for each choice of 1≤i<j≤n, where nn is the number of nodes.
For example, the third line of the file is "1 3 5250", indicating that the distance between nodes 1 and 3 (equivalently, the cost of the edge (1,3)) is 5250. You can assume that distances are positive, but you should NOT assume that they are distinct.
Your task in this problem is to run the clustering algorithm from lecture on this data set, where the target number kk of clusters is set to 4. What is the maximum spacing of a 4-clustering?
ADVICE: If you're not getting the correct answer, try debugging your algorithm using some small test cases. And then post them to the discussion forum!
"""
class Node(object):
def __init__(self, value):
self.nodeID = value
self.connections = []
class Edge(object):
def __init__(self, node1ID, node2ID, cost):
self.nodes = (node1ID, node2ID)
self.cost = cost
class UFS(object):
def __init__(self, length):
self.p = [i for i in range(length)]
self.rank = [0 for _ in range(length)]
def find(self, x):
if self.p[x] != x:
self.p[x] = self.find(self.p[x])
return self.p[x]
def union(self, x, y):
rx, ry = self.find(x), self.find(y)
if self.rank[rx] < self.rank[ry]:
self.p[rx] = ry
elif self.rank[rx] > self.rank[ry]:
self.p[ry] = rx
else:
self.p[rx] = ry
self.rank[ry] += 1
def dataReader(filePath):
with open(filePath) as f:
data = f.readlines()
numNodes = int(data[0])
nodes = [Node(i) for i in range(numNodes + 1)]
edges = []
for index in range(1, len(data)):
node1ID, node2ID, cost = list(map(int, data[index].split()))
nodes[node1ID].connections.append((node2ID, cost))
nodes[node2ID].connections.append((node1ID, cost))
edges.append(Edge(node1ID, node2ID, cost))
return numNodes, nodes, edges
def clustering(numClusters, numNodes, nodes, edges):
ufs = UFS(numNodes + 1)
findRes = False
edges.sort(key=lambda item:item.cost)
for edge in edges:
node1ID, node2ID = edge.nodes
if ufs.find(node1ID) != ufs.find(node2ID):
if not findRes:
ufs.union(node1ID, node2ID)
numNodes -= 1
else:
return edge.cost
if numNodes == numClusters:
findRes = True
def main():
filePath = "data/clustering1.txt"
numNodes, nodes, edges = dataReader(filePath)
minimumSpacing = clustering(4, numNodes, nodes, edges)
print("Minimum Spacing of 4 clusters: ", minimumSpacing)
if __name__ == "__main__":
main()