-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrack_reddit.jl
204 lines (188 loc) · 7.08 KB
/
track_reddit.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
using ArgParse
using Arpack
using CSV, DataFrames
using LightGraphs
using LinearMaps
using LinearAlgebra, Random, Statistics
using SparseArrays
include("EvolvingNetworks.jl")
include("Utils.jl")
"""
update_eigvals(Anew, Vnew, λs, γ, ϵ, qPred, r, skip_last) -> (sSize, λAll, Vold, qIter)
Perform phase 2 of the eigenvalue estimation algorithm, applying randomized
subspace or block krylov (if `use_bk == true`) on the deflated matrix
`(I - Vnew * Vnew') * Anew * (I - Vnew * Vnew')`, given the dominant eigenvalues
`λs`, the assumed decay factor `γ`, a target accuracy `ϵ`, the number of higher
order eigenvalues `r` and the number `skip_last` of eigenvalue ratios to avoid
considering.
"""
function update_eigvals(Anew, Vnew, Vold, λs, γ, ϵ, nHi, skip_last)
n = size(Anew)[1]; ℓ = nHi + 5 # oversampling factor
Vold[:, 1:nHi], λHi, qIter = Utils.bkvals(Anew, nHi, Vold, ϵ,
getVecs=true, Vp=Vnew)
λAll = abs.(vcat(λs, λHi))
conn_comp = sum(abs.(λAll .- maximum(λAll)) .<= ϵ)
sSize, λAll = Utils.getMinRatio(abs.(vcat(λs, λHi)), conn_comp + skip_last)
return sSize, λAll, Vold, qIter
end
# opnorm of a LinearMap
lmap_opnorm(Lmap) = first(svds(Lmap, ritzvec=false, tol=1e-6, nsv=1)[1].S)
"""
resize_subspace(A, V0, VHi, sSize)
Appropriately resize the subspace `V0` when `sSize != size(V0)[2]`, using the
leading `sSize` Ritz vectors of the larger subspace `[V0, VHi]` associated to
`A`.
"""
function resize_subspace(A, V0, VHi, sSize)
sDiff = sSize - size(V0)[2]
if (sDiff > 0)
# add excess vectors from
V0 = hcat(V0, VHi[:, 1:sDiff])
else
if (sDiff < 0) # keep leading `sSize` vectors
V0, _ = Utils.rayleigh_ritz(A, hcat(V0, VHi), sSize)
end
end
return V0
end
"""
update_subspace(A, V0, use_bk, sSize, ϵ)
Run the LOBPCG or subspace iteration methods, depending on whether `use_bk` has
been set, on the matrix `A` with starting guess `V0` to find an `ϵ`-accurate
subspace of size `sSize`.
"""
function update_subspace(A, V0, use_bk, sSize, ϵ)
n = size(A)[1]
if use_bk
return @timed Utils.block_krylov(A, sSize, copy(V0), ϵ, maxiter=n)
else
return @timed Utils.subspace_iteration(A, sSize, n, ϵ, V0=copy(V0))
end
end
# Track the invariant subspace
# ============================
function track_subspace(gSrc, gTgt, eAdd; monitor_freq::Int, ratio::Float64,
γ::Float64, δ::Float64, ϵ::Float64, use_bk::Bool)
@info("Network init - [OK]. Remaining edges: $(length(eAdd)) - |E|: $(gTgt.ne)")
gComp = connected_components(gTgt)[1]
Anorm = EvolvingNetworks.reg_adjacency(gSrc)
# preallocate E matrix
n = size(Anorm)[1]
Emat = SparseMatrixCSC{Float64, Int64}(sparse([], [], [], n, n))
# initial dimensions and evals / evecs
max_dim = 120; nHi = 5; V0 = Utils._qrDirect(randn(n, max_dim))
V0, D, _ = Utils.block_krylov(Anorm, max_dim, V0, ϵ, maxiter=n)
# get number of connected components
conn_comp = sum(abs.(D .- maximum(D)) .<= ϵ)
sSize, _ = Utils.getMinRatio(abs.(D), conn_comp + 1)
λ1, λr, λr₊ = D[[1, sSize, sSize+1]]
# isolate initial eigenspace and higher order eigenspace
V0 = V0[:, 1:sSize]; @info("λs: $(λr), $(λr₊)")
G = randn(n, nHi); VHi = Utils._qrDirect(G - V0 * (V0' * G))
# stat lists
nImp = []; Δs = []; Δother = []; steps = []; stepsHi = []
bkPred = []; siPred = []; core = []; high = []
# number of added edges
eAdded = 0
for j = 1:length(eAdd)
# add edge to graph
(ei, ej) = pop!(eAdd); has_edge(gSrc, ei, ej) && continue
add_edge!(gSrc, ei, ej); eAdded += 1
if (eAdded % monitor_freq) == 0
Anew = EvolvingNetworks.reg_adjacency(gSrc)
E = LinearMap(X -> Anew * X - Anorm * X, n, n, issymmetric=true)
Enorm = lmap_opnorm(E); Anorm = Anew
# update subspace if new size is different
V0 = resize_subspace(Anew, V0, VHi, sSize)
Δ = Utils.davKahProxy(V0, E, Enorm, ϵ, λ1, λr, λr₊)
(Δ == 0) && continue
@info("Repeat: $(eAdded) - |E|_2 = $(Enorm) - Δ: $(Δ)")
push!(Δs, Δ) # update stat
# prediction using a-priori bound, if applicable
nPredBK, nPredSI, eNorm_r, eNorm_r₊ =
Utils.getBounds(Δ, E, λr, λr₊, ϵ, γ, n, V0, VHi, sSize)
push!(bkPred, nPredBK); push!(siPred, nPredSI) # update stats
# update subspace
(Vnew, λs, runIt), coreTime, _ =
update_subspace(Anew, V0, use_bk, sSize, ϵ)
push!(steps, runIt)
# update high order eigenvalues for next iteration
(sSize, λAll, VHi, qIter), hiTime, _ =
@timed update_eigvals(Anew, Vnew, VHi, λs, γ, ϵ, nHi, 1)
# update subspace size and new λ1, λr, λr₊, V0
λ1, λr, λr₊ = λAll[[1, sSize, sSize+1]]; V0 = copy(Vnew)
# update statistics
append!(nImp, sSize); append!(stepsHi, qIter)
append!(core, coreTime); append!(high, hiTime)
# update linear map
Anorm = Anew
end
end
# Return all stats
return steps, siPred, bkPred, stepsHi, nImp, Δs, core, high
end
s = ArgParseSettings(description="""
Track the leading eigenspace of the temporal-reddit-reply dataset using
the proposed adaptive method.""")
@add_arg_table s begin
"--filepath"
help = "Path to file containing data"
arg_type = String
default = "datasets/temporal-reddit-reply.txt"
"--monitor_freq"
help = "The monitoring frequency"
arg_type = Int
default = 10
"--ratio"
help = "The ratio of edges in the starting graph and the static version"
arg_type = Float64
default = 0.9
"--seed"
help = "The random seed for the RNG"
arg_type = Int
default = 999
"--gamma"
help = "The eigenvalue decay parameter γ"
arg_type = Float64
default = 0.1
"--delta_fail"
help = "The probability of failure for randomized iteration"
arg_type = Float64
default = 1e-4
"--eps"
help = "The desired subspace distance to retain"
arg_type = Float64
default = 1e-3
"--record_true"
help = "Set to record true subspace distance at each iter"
action = :store_true
"--max_nodes"
help = "Maximum number of vertices to keep"
arg_type = Int
default = 100000
"--use_bk"
help = "Set to use a block Krylov method instead of subspace iteration"
action = :store_true
end
parsed = parse_args(s); Random.seed!(parsed["seed"])
monitor_freq, ratio = parsed["monitor_freq"], parsed["ratio"]
γ, δ_fail, ϵ = parsed["gamma"], parsed["delta_fail"], parsed["eps"]
use_bk, record_true = parsed["use_bk"], parsed["record_true"]
max_nodes = parsed["max_nodes"]
filepath = parsed["filepath"]
# get the temporal-reddit-reply graph
gSrc, gTgt, eAdd =
EvolvingNetworks.genRedditReplyGraphs(filepath, max_nodes, ratio)
# number of nodes
nTotal = size(adjacency_matrix(gTgt))[1]
# retrieve statistics of interest
steps, sPredSI, sPredBK, stepsHi, nImp, Δs, core, high =
track_subspace(
gSrc, gTgt, eAdd, monitor_freq=monitor_freq, ratio=ratio, γ=γ,
δ=δ_fail, ϵ=ϵ, use_bk=use_bk)
df = DataFrame(k=(1:length(steps)) .* monitor_freq,
steps=steps, stepsHi=stepsHi, bkPred=sPredBK,
siPred=sPredSI, nImp=nImp, Ds=Δs,
coreTime=core, highTime=high)
# write data to output
CSV.write("track_reddit_max_nodes-$(max_nodes)_freq-$(monitor_freq)_bk_$(use_bk).csv", df)