-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathens_snapshot_tools.py.orig
354 lines (257 loc) · 11.1 KB
/
ens_snapshot_tools.py.orig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# ens_snapshot_tools.py
# D Amrhein, November 2020
import numpy as np
def mk_seofs_ts(data,time,neofs,binsize):
"""
Function to compute "snapshot" projections of patterns onto ensemble
model output
INPUTS
data Modal data matrix with dimensions (time,space,nens)
time Time axis of model output
patts Matrix of spatial patterns (space along rows) to project
binsize Odd (symmetric) length of bin in time in which to compute
SEOFs
OUTPUTS
EOF_ts Time series (indexed by space, EOF index, time) of leading
EOFs
SV_ts Time series (indexed by EOF index, time) of leading
singular values
t Time corresponding to EOF_ts and SV_ts
We throw away padding at the beginning and end so that all
computations use the same number of times.
"""
<<<<<<< HEAD
pad = int(np.floor(binsize/2))
[td,sd,_] = data.shape
# Define a vector of time indices not in padded regions
tis = np.arange(pad,td-pad)
ltis = len(tis)
t = np.empty(ltis)
EOF_ts = np.empty([ltis,sd,neofs])
SV_ts = np.empty([ltis,neofs])
for ii,ti in enumerate(tis):
=======
pad = np.floor(binsize/2) #problem here w indent/unindent
[td,_,_] = data.shape
t = []
EOF_ts = []
SV_ts = []
# Define a vector of time indices not in padded regions
tis = np.arange(pad,td-pad)
for ii in np.arange(tis):
ti = tis[ii]
>>>>>>> 15e860c81127eb30a42a26aff5e3663222296fa4
# Define a time range
tr = np.arange(ti-pad,ti+pad+1)
# Select time window
dat = data[tr,:,:]
# Compute weights and save
sU,sS = mk_seofs(dat,neofs)
<<<<<<< HEAD
EOF_ts[ii,:,:] = sU
#import pdb
#pdb.set_trace()
SV_ts[ii,:] = sS
t[ii] = time[ti]
=======
EOF_ts[:,:,ii] = sU
SV_ts[:,ii] = sS
t[ii] = time(ti)
>>>>>>> 15e860c81127eb30a42a26aff5e3663222296fa4
return EOF_ts, SV_ts, t
def mk_seofs(dat,neofs):
<<<<<<< HEAD
"""
Function to compute "snapshot" EOFs from ensemble model output
INPUTS
data Modal data matrix with dimensions (time,space,nens)
neofs Number of snapshot EOFs to output as a function of time.
OUTPUTS
sU Left singular vectors (EOFs) computed over ensemble number
and time for this particular time bin
sS Same as sU, but a vector of singular values
"""
=======
"""
Function to compute "snapshot" EOFs from ensemble model output
INPUTS
data Modal data matrix with dimensions (time,space,nens)
neofs Number of snapshot EOFs to output as a function of time.
OUTPUTS
sU Left singular vectors (EOFs) computed over ensemble number
and time for this particular time bin
sS Same as sU, but a vector of singular values
"""
>>>>>>> 15e860c81127eb30a42a26aff5e3663222296fa4
# Change dimensions of dat from (time, space, nens) to (space,time,nens) to allow for reshaping
dats = np.transpose(dat,(1,0,2))
[sd,td,nd] = dats.shape
# Reshape so that the second axis is a combination of time and ensemble dimensions.
# Default is 'C' indexing which should leave the time dimension intact.
datr = dats.reshape((sd,td*nd))
# Remove the mean over time and ensembles
datnm = datr - datr.mean(axis=1, keepdims=True)
# Compute SVD
[sUa,sSa,_] = np.linalg.svd(datnm,full_matrices=False)
# Return desired number of singular values and vectors
sU = sUa[:,:neofs]
sS = sSa[:neofs]
return sU, sS
def mk_sproj_ts(data,time,patts,binsize):
"""
Function to compute "snapshot" projections of patterns onto ensemble
model output
INPUTS
data Modal data matrix with dimensions (time,space,nens)
time Time axis of model output
patts Matrix of spatial patterns (space in columns) to project
binsize Odd (symmetric) length of bin in time in which to compute
SEOFs
OUTPUTS
wts_ts Time series (indexed by column) of weights corresponding to
different patterns (indexed by row)
t Time for weight time series
We throw away padding at the beginning and end so that all
computations use the same number of times.
"""
pad = int(np.floor(binsize/2))
[td,sd,_] = data.shape
[_,pd] = patts.shape
# Define a vector of time indices not in padded regions
tis = np.arange(pad,td-pad)
ltis = len(tis)
<<<<<<< HEAD
t = np.empty(ltis)
wts_ts = np.empty([ltis,pd])
for ii,ti in enumerate(tis):
=======
for ii in np.arange(tis):
ti = tis[ii]
>>>>>>> 15e860c81127eb30a42a26aff5e3663222296fa4
# Define a time range
tr = np.arange(ti-pad,ti+pad+1)
# Select time window
dat = data[tr,:,:]
# Compute weights and save
<<<<<<< HEAD
wts_ts[ii,:] = mk_sproj(dat,patts)
=======
wts_ts[][:,ii] = mk_sproj(dat,patts)
>>>>>>> 15e860c81127eb30a42a26aff5e3663222296fa4
t[ii] = time[ti]
return wts_ts, t
def mk_sproj(dat,patts):
"""
Function to compute "snapshot" projections of patterns onto ensemble
model output
INPUTS
data Modal data matrix with dimensions (time,space,nens)
patts Matrix of spatial patterns (space along rows) to project
OUTPUTS
wts Vector of SVD-like weights computed for each pattern by
summing projections of ensemble number and time
Goal is to get a weighting like an EOF. So the procedure is to
project each pattern onto the data, ending up with a vector (indexed
by time and nens). Then normalize that vector so that v'v*v = 1 (like
a PC). The normalization is the weight.
"""
# Change dimensions of dat from (time, space, nens) to (space,time,nens) to allow for reshaping
dats = np.transpose(dat,(1,0,2))
[sd,td,nd] = dats.shape
# Reshape so that the second axis is a combination of time and ensemble dimensions.
# Default is 'C' indexing which should leave the time dimension intact.
datr = dats.reshape((sd,td*nd))
# Remove the mean over time and ensembles
datnm = datr - datr.mean(axis=1, keepdims=True)
# Compute projection of each pattern onto all ensemble members
proj = datnm.T.dot(patts);
# Compute how much of the variance across ensemble members is accounted for by the pattern
wts = np.sqrt(np.sum(proj*proj,0));
return wts
def mk_covs(data,time,binsize):
"""
Constructs a time series of covariance matrices from a data set with time, space, and ensemble dimensions.
In practice, "data" must have a small spatial dimension because we are explicitly storing covariance matrices.
As such, for large model output it is recommended that data be first projected into a reduced basis.
INPUTS
data Modal data matrix with dimensions (space,time,nens)
time Time axis of model output
binsize Odd (symmetric) length of bin in time in which to compute
SEOFs
OUTPUTS
C 3d matrix of time-evolving spatial covariances computed over ensemble space and, for binsize>1, a moving time window
t Time for covariance matrix time series
"""
pad = int(np.floor(binsize/2))
[td,sd,_] = data.shape
# Define a vector of time indices not in padded regions. These will be our bin centers.
tis = np.arange(pad,td-pad)
ltis = len(tis)
t = np.empty(ltis)
C = np.empty([ltis,sd,sd])
for ii,ti in enumerate(tis):
# Define a time range
tr = np.arange(ti-pad,ti+pad+1)
# Select time window
dat = data[tr,:,:]
# Change dimensions of dat from (time, space, nens) to (space,time,nens) to allow for reshaping
dats = np.transpose(dat,(1,0,2))
[sd,td,nd] = dats.shape
# Reshape so that the second axis is a combination of time and ensemble dimensions.
# Default is 'C' indexing which should leave the time dimension intact.
datr = dats.reshape((sd,td*nd))
# Remove the mean over time and ensembles
datnm = datr - datr.mean(axis=1, keepdims=True)
# Save the covariance matrix. Warning -- only do this with reduced space!
C[ii,:,:] = datnm.dot(datnm.T)
# Corresponding time
t[ii] = time[ti]
# Remove the time mean covariance
# Cnm = C - C.mean(axis=1, keepdims=True)
# Compute dominant changes to covariance
# [uC,sC,vC] = np.linalg.svd(Cnm, full_matrices=False)
return C,t
def reduce_space(data,nEOF):
"""
Projects a field (time, space, nens) onto its nEOF leading EOFs.
INPUTS
data Modal data matrix with dimensions (space,time,nens)
nEOF Number of EOFs retained
OUTPUTS
rbdor Reduced-space (eof index, time, nens) data matrix
ur EOFs used to project reduced-space back into full state
s Full vector of singular values
"""
# Change dimensions of dat from (time, space, nens) to (space,time,nens) to allow for reshaping
dats = np.transpose(data,(1,0,2))
[sd,td,nd] = dats.shape
# Reshape so that the second axis is a combination of time and ensemble dimensions.
# Default is 'C' indexing which will leave the time dimension intact.
datr = dats.reshape((sd,td*nd))
# Compute EOFs as a reduced basis
[u,s,vt] = np.linalg.svd(datr,full_matrices=False)
# This is the output in the reduced basis. Keep nEOF
rbd = (vt[:nEOF,:]*s[:nEOF,None]).T
# Reshape into original dimensions
rbdo = rbd.reshape(nEOF,td,nd)
# Reorder dimensions like the original
rbdor = np.transpose(rbdo,(1,0,2))
# These are the columns of u that are useful
ur = u[:,:nEOF]
return rbdor, ur, s
def KLdiv(C0,C1,m0,m1):
"""
Computes Kullback-Leibler divergence between two (full-rank) Gaussian processes with sample
covariances matrices C0 and C1 and sample means m0 and m1. C0 and C1 must be full rank.
See https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
INPUTS
C0 (space, space) First covariance matrix
C1 (space, space) Second covariance matrix
m0 (space) First mean vector
m1 (space) Second mean vector
OUTPUTS
kld Kullback-Leibler divergence
"""
C1i = np.linalg.inv(C1)
kld = 1/2 * ( np.trace(C1i.dot(C0)) + (m0-m1).T.dot(C1i).dot(m0-m1) + np.log(np.linalg.det(C1)/np.linalg.det(C1)) )
return kld