-
Notifications
You must be signed in to change notification settings - Fork 0
/
gradientdescent.jl
140 lines (123 loc) · 3.45 KB
/
gradientdescent.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
using LinearAlgebra
using Zygote
## there are some hardcoded dimensions, need to fix
function gd_step( # computes gradient using zygote
current,
loss_f,
alpha
)
return current .- alpha .* gradient((t1,t2) -> loss_f([t1,t2]), current[1],current[2])
end
function gd_step( # specified gradient
current,
loss_f,
grad_f,
alpha
)
return current .- alpha .* grad_f(current)
end
function run_gd(
initial,
loss_fn,
alpha,
N_iterations)
conv = zeros(N_iterations+1) # keep track of convergence
conv[1] = loss_fn(initial)
current = initial
for i in 1:N_iterations
next = gd_step(current, loss_fn, alpha)
current = next
conv[i+1] = loss_fn(current) # save cost value at current point
end
return (current, conv)
end
function run_gd( # specified gradient
initial,
loss_fn,
alpha,
N_iterations,
grad_fn
)
conv = zeros(N_iterations+1) # keep track of convergence
conv[1] = loss_fn(initial)
current = initial
for i in 1:N_iterations
next = gd_step(current, loss_fn, grad_fn, alpha)
current = next
conv[i+1] = loss_fn(current) # save cost value at current point
end
return (current, conv)
end
function gd_step_momentum( # computes gradient using zygote
current,
loss_f,
alpha,
beta,
v,
)
v_new = beta*v .- alpha .* gradient((t1,t2) -> loss_f([t1,t2]), current[1],current[2])[:]
return current .+ v_new, v_new
end
function run_gd_momentum(
initial,
loss_fn,
alpha,
beta,
N_iterations)
conv = zeros(N_iterations+1) # keep track of convergence
conv[1] = loss_fn(initial)
current = initial
v = zeros(size(initial))
for i in 1:N_iterations
next, v = gd_step_momentum(current, loss_fn, alpha, beta, v)
current = next
conv[i+1] = loss_fn(current) # save cost value at current point
end
return (current, conv)
end
function gd_step_nesterov( # computes gradient using zygote
current,
loss_f,
alpha,
beta,
v,
)
v_new = beta*v .- alpha .* gradient((t1,t2) -> loss_f([t1,t2]), current[1]+beta*v[1],current[2]+beta*v[2])[:]
return current .+ v_new, v_new
end
function run_gd_nesterov(
initial,
loss_fn,
alpha,
beta,
N_iterations)
conv = zeros(N_iterations+1) # keep track of convergence
conv[1] = loss_fn(initial)
current = initial
v = zeros(size(initial))
for i in 1:N_iterations
next, v = gd_step_nesterov(current, loss_fn, alpha, beta, v)
current = next
conv[i+1] = loss_fn(current) # save cost value at current point
end
return (current, conv)
end
function run_gd_nesterov_tracked( # tracks history of values
initial,
loss_fn,
alpha,
beta,
N_iterations)
historical = zeros(N_iterations, length(initial))
conv = zeros(N_iterations+1) # keep track of convergence
conv[1] = loss_fn(initial)
current = initial
v = zeros(size(initial))
for i in 1:N_iterations
historical[i, :] = current
next, v = gd_step_nesterov(current, loss_fn, alpha, beta, v)
current = next
conv[i+1] = loss_fn(current) # save cost value at current point
end
return (historical, conv)
end