-
Notifications
You must be signed in to change notification settings - Fork 0
/
gradient_descent.py
92 lines (71 loc) · 2.6 KB
/
gradient_descent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""Gradient descent from scratch with backtracking line search"""
from typing import Callable
import numpy as np
from regression import load_data, least_squares_regression, plot
def backtracking_line_search(
f: callable,
X: np.ndarray,
Y: np.ndarray,
w: np.ndarray,
delta_x: np.ndarray,
a: float = 0.3, # Between 0 and 1
b: float = 0.8, # Between 0 and 1
) -> float:
# t is step length parameter
t = 1
while f(X, Y, w + t * delta_x) > f(X, Y, w) + (
a * t * grad_least_squares(X, Y, w).T @ delta_x
):
t *= b
return t
def gradient_descent(
f: Callable[[np.ndarray, np.ndarray, np.ndarray], np.ndarray],
X: np.ndarray,
Y: np.ndarray,
) -> np.ndarray:
# start at arbitrary point w based on x's dimension
w = np.array(np.reshape(X[0], (X.shape[1], 1)), dtype="float64")
# Calculate gradient and reverse it to walk to the next minimum
delta_x = -grad_least_squares(X, Y, w)
# Loop as long as gradient changes more than threshold
while np.linalg.norm(delta_x) > 0.000001:
# print(np.linalg.norm(delta_x))
# backtacking line search to get best step rate
step_rate = backtracking_line_search(f, X, Y, w, delta_x)
# update starting point
w += step_rate * delta_x
# update gradient at the new point
delta_x = -grad_least_squares(X, Y, w)
return w
# RSS loss function
def loss(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
Xb = x @ b
y_minus_xb = y[:, None] - Xb
return y_minus_xb.T @ y_minus_xb
# alternative definition: return np.sum((y - (x.T * b)) ** 2)
# Gradinet of RSS loss function
def grad_least_squares(x: np.ndarray, y: np.ndarray, b: np.ndarray) -> np.ndarray:
return 2 * x.T @ (x @ b - y[:, None])
# Normal equation for comparing results
def normal_equation(X: np.ndarray, y: np.ndarray) -> np.ndarray:
first = np.linalg.pinv(X.T @ X)
return first @ X.T @ y
if __name__ == "__main__":
# load our dataset
x, y = load_data("dataset4.npy")
x_extended = np.column_stack((x, np.ones(len(x))))
# Descent to global minumum
d = gradient_descent(loss, x_extended, y).flatten()
# Compute RSS
pred = x_extended @ d
e = y - pred
rss = np.linalg.norm(e, ord=2)
# Results
# Solves normal equation for comparison
normal_solution = normal_equation(x_extended, y)
# least squares solution to compare actual coefficients
w = least_squares_regression(x, y)
print(
f"Correct coefficients: {w}\nGradient solution: {d}\nNormal equation: {normal_solution}\nRSS: {rss}"
)
plot(x, y, d)