-
Notifications
You must be signed in to change notification settings - Fork 1
/
BFGS.py
46 lines (39 loc) · 1.51 KB
/
BFGS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from numpy import linalg as LA
from LineSearch import wolfe_step
import numpy as np
import copy
import time
def approximate_inverse_hessian(H_cur, x_cur, x_next, g_cur, g_next):
s = (x_next - x_cur).reshape((x_cur.shape[0], 1))
y = (g_next - g_cur).reshape((x_cur.shape[0], 1))
M1 = (s.T @ y + y.T @ H_cur @ y) * (s @ s.T) / ((s.T @ y) ** 2)
M2 = (H_cur @ y @ s.T + s @ y.T @ H_cur) / (s.T @ y)
H_next = H_cur + M1 - M2
return H_next
def bfgs(M, e, max_iter, *args):
M.total_time = time.time()
w_cur = M.w_0
func_cur, grad_cur = M.first_oracle(w_cur)
func_0, grad_0 = func_cur, copy.deepcopy(grad_cur)
H_cur = np.eye(w_cur.shape[0])
d = -H_cur @ grad_cur
s = wolfe_step(M, w_cur, d)
if s is None:
s = 1
w_next = w_cur + s * d
func_next, grad_next = M.first_oracle(w_next)
H_next = approximate_inverse_hessian(H_cur, w_cur, w_next, grad_cur, grad_next)
M.r_k.append(LA.norm(grad_0) ** 2)
while LA.norm(grad_cur) ** 2 / LA.norm(grad_0) ** 2 > e and M.iter_num < max_iter:
H_cur, w_cur, func_cur, grad_cur = H_next, w_next, func_next, grad_next
d = -H_cur @ grad_cur
s = wolfe_step(M, w_cur, d)
if s is None:
s = 1
w_next = w_cur + s * d
func_next, grad_next = M.first_oracle(w_next)
H_next = approximate_inverse_hessian(H_cur, w_cur, w_next, grad_cur, grad_next)
M.iter_num += 1
M.r_k.append(LA.norm(grad_cur) ** 2)
M.total_time = time.time() - M.total_time
return w_cur