-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear_regression.py
91 lines (68 loc) · 2.04 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt
from matplotlib import markers
import numpy as np
def load_data():
boston = datasets.load_boston()
X = boston.data
y = boston.target
features = boston.feature_names
return X,y,features
def visualize(X, y, features):
plt.figure(figsize=(20, 5))
feature_count = X.shape[1]
# i: index
for i in range(feature_count):
splot = plt.subplot(3, 5, i + 1)
# Plot feature i against y
x = [x[i] for x in X]
splot.plot(x, y, '.')
splot.set_title(features[i])
plt.tight_layout()
plt.show()
def predict(X, w):
ones = np.ones((len(X), 1))
X1 = np.hstack((ones, X))
return X1.dot(w)
def fit_regression(X,Y):
ones = np.ones((len(X), 1))
X = np.hstack((ones, X))
# ax = b
# x: w
# a: X^T * X
# b: X^T * Y
a = np.dot(X.transpose(), X)
b = np.dot(X.transpose(), Y)
w = np.linalg.solve(a, b)
return w
def graph(y, y_p):
plt.figure(figsize=(15, 8))
splot = plt.subplot(1,1,1)
splot.scatter(range(len(y)), y, marker='.')
splot.scatter(range(len(y)), y_p, marker='.', color='red')
plt.tight_layout()
plt.show()
def main():
# Load the data
X, y, features = load_data()
print("Features: {}".format(features))
# Visualize the features
visualize(X, y, features)
# Split data into train and test
idx = np.random.choice(506, 506, replace=False)
test_len = len(X) // 5
trn_len = len(X) - test_len
trn_X = X[idx[:trn_len]]
trn_y = y[idx[:trn_len]]
test_X = X[idx[trn_len:]]
test_y = y[idx[trn_len:]]
# Fit regression model
w = fit_regression(trn_X, trn_y)
# Compute fitted values, MSE, etc.
pred = predict(test_X, w)
print("mean squared error: ", np.mean((pred - test_y)**2))
print("root mean squared error: ", np.sqrt(np.mean((pred - test_y)**2)))
print("sum of squared errors: ", np.sum((pred-test_y)**2))
# graph(test_y, pred)
if __name__ == "__main__":
main()