forked from eriklindernoren/ML-From-Scratch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
support_vector_machine.py
144 lines (118 loc) · 4.77 KB
/
support_vector_machine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from __future__ import division, print_function
import math
import sys
import os
import numpy as np
import cvxopt
from sklearn import datasets
# Import helper functions
from mlfromscratch.utils.data_manipulation import train_test_split, normalize
from mlfromscratch.utils.data_operation import accuracy_score
from mlfromscratch.utils.kernels import *
from mlfromscratch.unsupervised_learning import PCA
from mlfromscratch.utils import Plot
# Hide cvxopt output
cvxopt.solvers.options['show_progress'] = False
class SupportVectorMachine(object):
"""The Support Vector Machine classifier.
Uses cvxopt to solve the quadratic optimization problem.
Parameters:
-----------
C: float
Penalty term.
kernel: function
Kernel function. Can be either polynomial, rbf or linear.
power: int
The degree of the polynomial kernel. Will be ignored by the other
kernel functions.
gamma: float
Used in the rbf kernel function.
coef: float
Bias term used in the polynomial kernel function.
"""
def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
self.C = C
self.kernel = kernel
self.power = power
self.gamma = gamma
self.coef = coef
self.lagr_multipliers = None
self.support_vectors = None
self.support_vector_labels = None
self.intercept = None
def fit(self, X, y):
n_samples, n_features = np.shape(X)
# Set gamma to 1/n_features by default
if not self.gamma:
self.gamma = 1 / n_features
# Initialize kernel method with parameters
self.kernel = self.kernel(
power=self.power,
gamma=self.gamma,
coef=self.coef)
# Calculate kernel matrix
kernel_matrix = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
kernel_matrix[i, j] = self.kernel(X[i], X[j])
# Define the quadratic optimization problem
P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
q = cvxopt.matrix(np.ones(n_samples) * -1)
A = cvxopt.matrix(y, (1, n_samples), tc='d')
b = cvxopt.matrix(0, tc='d')
if not self.C:
G = cvxopt.matrix(np.identity(n_samples) * -1)
h = cvxopt.matrix(np.zeros(n_samples))
else:
G_max = np.identity(n_samples) * -1
G_min = np.identity(n_samples)
G = cvxopt.matrix(np.vstack((G_max, G_min)))
h_max = cvxopt.matrix(np.zeros(n_samples))
h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
h = cvxopt.matrix(np.vstack((h_max, h_min)))
# Solve the quadratic optimization problem using cvxopt
minimization = cvxopt.solvers.qp(P, q, G, h, A, b)
# Lagrange multipliers
lagr_mult = np.ravel(minimization['x'])
# Extract support vectors
# Get indexes of non-zero lagr. multipiers
idx = lagr_mult > 1e-7
# Get the corresponding lagr. multipliers
self.lagr_multipliers = lagr_mult[idx]
# Get the samples that will act as support vectors
self.support_vectors = X[idx]
# Get the corresponding labels
self.support_vector_labels = y[idx]
# Calculate intercept with first support vector
self.intercept = self.support_vector_labels[0]
for i in range(len(self.lagr_multipliers)):
self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
i] * self.kernel(self.support_vectors[i], self.support_vectors[0])
def predict(self, X):
y_pred = []
# Iterate through list of samples and make predictions
for sample in X:
prediction = 0
# Determine the label of the sample by the support vectors
for i in range(len(self.lagr_multipliers)):
prediction += self.lagr_multipliers[i] * self.support_vector_labels[
i] * self.kernel(self.support_vectors[i], sample)
prediction += self.intercept
y_pred.append(np.sign(prediction))
return np.array(y_pred)
def main():
data = datasets.load_iris()
X = normalize(data.data[data.target != 0])
y = data.target[data.target != 0]
y[y == 1] = -1
y[y == 2] = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print ("Accuracy:", accuracy)
# Reduce dimension to two using PCA and plot the results
Plot().plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
if __name__ == "__main__":
main()