-
Notifications
You must be signed in to change notification settings - Fork 55
/
fcm_final.py
138 lines (112 loc) · 3.98 KB
/
fcm_final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import pandas as pd
import numpy as np
import random
import operator
import math
df_full = pd.read_csv("SPECTF_New.csv")
columns = list(df_full.columns)
features = columns[:len(columns)-1]
class_labels = list(df_full[columns[-1]])
df = df_full[features]
# Number of Attributes
num_attr = len(df.columns) - 1
# Number of Clusters
k = 2
# Maximum number of iterations
MAX_ITER = 100
# Number of data points
n = len(df)
# Fuzzy parameter
m = 2.00
def accuracy(cluster_labels, class_labels):
county = [0,0]
countn = [0,0]
tp = [0, 0]
tn = [0, 0]
fp = [0, 0]
fn = [0, 0]
for i in range(len(df)):
# Yes = 1, No = 0
if cluster_labels[i] == 1 and class_labels[i] == 'Yes':
tp[0] = tp[0] + 1
if cluster_labels[i] == 0 and class_labels[i] == 'No':
tn[0] = tn[0] + 1
if cluster_labels[i] == 1 and class_labels[i] == 'No':
fp[0] = fp[0] + 1
if cluster_labels[i] == 0 and class_labels[i] == 'Yes':
fn[0] = fn[0] + 1
for i in range(len(df)):
# Yes = 0, No = 1
if cluster_labels[i] == 0 and class_labels[i] == 'Yes':
tp[1] = tp[1] + 1
if cluster_labels[i] == 1 and class_labels[i] == 'No':
tn[1] = tn[1] + 1
if cluster_labels[i] == 0 and class_labels[i] == 'No':
fp[1] = fp[1] + 1
if cluster_labels[i] == 1 and class_labels[i] == 'Yes':
fn[1] = fn[1] + 1
a0 = float((tp[0] + tn[0]))/(tp[0] + tn[0] + fn[0] + fp[0])
a1 = float((tp[1] + tn[1]))/(tp[1] + tn[1] + fn[1] + fp[1])
p0 = float(tp[0])/(tp[0] + fp[0])
p1 = float(tp[1])/(tp[1] + fp[1])
r0 = float(tp[0])/(tp[0] + fn[0])
r1 = float(tp[1])/(tp[1] + fn[1])
accuracy = [a0*100,a1*100]
precision = [p0*100,p1*100]
recall = [r0*100,r1*100]
return accuracy, precision, recall
def initializeMembershipMatrix():
membership_mat = list()
for i in range(n):
random_num_list = [random.random() for i in range(k)]
summation = sum(random_num_list)
temp_list = [x/summation for x in random_num_list]
membership_mat.append(temp_list)
return membership_mat
def calculateClusterCenter(membership_mat):
cluster_mem_val = zip(*membership_mat)
cluster_centers = list()
for j in range(k):
x = list(cluster_mem_val[j])
xraised = [e ** m for e in x]
denominator = sum(xraised)
temp_num = list()
for i in range(n):
data_point = list(df.iloc[i])
prod = [xraised[i] * val for val in data_point]
temp_num.append(prod)
numerator = map(sum, zip(*temp_num))
center = [z/denominator for z in numerator]
cluster_centers.append(center)
return cluster_centers
def updateMembershipValue(membership_mat, cluster_centers):
p = float(2/(m-1))
for i in range(n):
x = list(df.iloc[i])
distances = [np.linalg.norm(map(operator.sub, x, cluster_centers[j])) for j in range(k)]
for j in range(k):
den = sum([math.pow(float(distances[j]/distances[c]), p) for c in range(k)])
membership_mat[i][j] = float(1/den)
return membership_mat
def getClusters(membership_mat):
cluster_labels = list()
for i in range(n):
max_val, idx = max((val, idx) for (idx, val) in enumerate(membership_mat[i]))
cluster_labels.append(idx)
return cluster_labels
def fuzzyCMeansClustering():
# Membership Matrix
membership_mat = initializeMembershipMatrix()
curr = 0
while curr <= MAX_ITER:
cluster_centers = calculateClusterCenter(membership_mat)
membership_mat = updateMembershipValue(membership_mat, cluster_centers)
cluster_labels = getClusters(membership_mat)
curr += 1
print(membership_mat)
return cluster_labels, cluster_centers
labels, centers = fuzzyCMeansClustering()
a,p,r = accuracy(labels, class_labels)
print("Accuracy = " + str(a))
print("Precision = " + str(p))
print("Recall = " + str(r))