-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTransitionMatrix.h
287 lines (256 loc) · 8.33 KB
/
TransitionMatrix.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#ifndef TRANSITION_MATRIX_H
#define TRANSITION_MATRIX_H
#include <cstring>
#include <cmath>
#include <vector>
#include <bitset>
#include "MatrixSize.h"
#include "CompilerHints.h"
#include "CodonFrequencies.h"
#ifdef USE_MKL_VML
#include <mkl_vml_functions.h>
#endif
#ifdef USE_LAPACK
#include "blas.h"
#endif
/// The transition matrix plus its eigendecomposition.
///
/// @author Mario Valle - Swiss National Supercomputing Centre (CSCS)
/// @date 2011-02-23 (initial version)
/// @version 1.1
///
class TransitionMatrix {
public:
/// Constructor.
///
TransitionMatrix() {
// Initialize Q matrix to all zeroes (so only non-zero values are written)
#ifdef USE_LAPACK
memset(mS, 0, N * N * sizeof(double));
#else
memset(mQ, 0, N * N * sizeof(double));
#endif
// Initialize the codons' frequencies
CodonFrequencies *cf = CodonFrequencies::getInstance();
mCodonFreq = cf->getCodonFrequencies();
mNumGoodFreq = static_cast<int>(cf->getNumGoodCodons());
mSqrtCodonFreq = cf->getSqrtCodonFrequencies();
cf->cloneGoodCodonIndicators(mGoodFreq);
#ifdef FORCE_IDENTITY_MATRIX
// Fill the identity matrix (to be used when time is zero)
memset(mIdentity, 0, N * N * sizeof(double));
for (int i = 0; i < N; ++i)
mIdentity[i * (1 + N)] = 1.0;
#endif
}
/// Fill the Q (or the S) matrix and return the matrix scale value.
///
/// @param[in] aOmega The omega value.
/// @param[in] aK The k value.
///
/// @return The Q matrix scale value.
///
double fillMatrix(double aOmega, double aK);
/// Fill the Q (or the S) matrix and return the matrix scale value. Optimized
/// routine to be used for omega == 1
///
/// @param[in] aK The k value.
///
/// @return The Q matrix scale value.
///
double fillMatrix(double aK);
/// Compute the eigendecomposition of the Q matrix.
/// The used codon frequencies should be already loaded using
/// setCodonFrequencies()
/// The results are stored internally
///
void eigenQREV(void);
/// Store in an external matrix the result of exp(Q*t)
///
/// @param[out] aOut The matrix where the result should be stored (size: N*N)
/// under USE_LAPACK it is stored transposed
/// @param[in] aT The time to use in the computation (it is always > 0)
///
void computeFullTransitionMatrix(double *RESTRICT aOut, double aT) const {
#ifdef FORCE_IDENTITY_MATRIX
// if time is zero or almost zero, the transition matrix become an identity
// matrix
if (aT < 1e-100) {
memcpy(aOut, mIdentity, N * N * sizeof(double));
return;
}
#endif
#ifdef USE_LAPACK
// vdExp creates problems to one MPI implementation (even if the segfault seems
// harmless and happens after the program end)
#undef USE_MKL_VML
double ALIGN64
tmp[N * N64]; // The rows are padded to 64 to increase performance
double ALIGN64 expt[N];
double tm = aT / 2.;
#ifndef USE_MKL_VML
// Manual unrolling gives the best results here.
// So it is exp(D*T/2). Remember, the eigenvalues are stored in reverse
// order
for (int c = 0; c < N - 1;) {
expt[c] = exp(tm * mD[N - 1 - c]);
++c;
expt[c] = exp(tm * mD[N - 1 - c]);
++c;
expt[c] = exp(tm * mD[N - 1 - c]);
++c;
expt[c] = exp(tm * mD[N - 1 - c]);
++c;
expt[c] = exp(tm * mD[N - 1 - c]);
++c;
expt[c] = exp(tm * mD[N - 1 - c]);
++c;
}
expt[N - 1] = exp(tm * mD[0]);
for (int r = 0; r < N; ++r) {
for (int c = 0; c < N; ++c) {
tmp[r * N64 + c] = expt[c] * mV[r * N + c];
// tmp[r*N+c] = expt[c]*mV[r*N+c];
}
}
#else
// Manual unrolling gives the best results here.
// Remember, the eigenvalues are stored in reverse order
for (int j = 0; j < N - 1;) {
tmp[j] = tm * mD[N - 1 - j];
++j;
tmp[j] = tm * mD[N - 1 - j];
++j;
tmp[j] = tm * mD[N - 1 - j];
++j;
tmp[j] = tm * mD[N - 1 - j];
++j;
tmp[j] = tm * mD[N - 1 - j];
++j;
tmp[j] = tm * mD[N - 1 - j];
++j;
}
tmp[60] = tm * mD[0];
vdExp(N, tmp, expt);
for (int r = 0; r < N; ++r) {
vdMul(N, expt, &mV[r * N], &tmp[r * N64]);
// vdMul(N, expt, &mV[r*N], &tmp[r*N]);
}
#endif
dsyrk_("U", "T", &N, &N, &D1, tmp, &N64, &D0, aOut, &N);
// dsyrk_("U", "T", &N, &N, &D1, tmp, &N, &D0, aOut, &N);
#else
// The first iteration of the loop (k == 0) is split out to initialize aOut
double *p = aOut;
double expt =
exp(aT *
mD[N - 1]); // Remember, the eigenvalues are stored in reverse order
for (int i = 0; i < N; ++i) {
const double uexpt = mU[i * N] * expt;
for (int j = 0; j < N; ++j) {
*p++ = uexpt *mV[j];
}
}
// The subsequent iterations are computed normally
for (int k = 1; k < N; ++k) {
p = aOut;
expt = exp(aT * mD[N - 1 - k]); // Remember, the eigenvalues are stored in
// reverse order
for (int i = 0; i < N; ++i) {
const double uexpt = mU[i * N + k] * expt;
for (int j = 0; j < N; ++j) {
*p++ += uexpt *mV[k * N + j];
}
}
}
#endif
}
private:
/// Compute the eigendecomposition
///
/// @param[in,out] aU Matrix to be decomposed on input, Eigenvectors on output
/// @param[in] aDim The matrix dimension
/// @param[out] aR The eigenvalues
/// @param[out] aWork A working area used only for non lapack version
///
/// @exception std::range_error Error in EigenTridagQLImplicit (no lapack
/// used)
/// @exception FastCodeMLMemoryError Error sizing workareas
/// @exception std::range_error No convergence in dsyevr
///
void eigenRealSymm(double *RESTRICT aU, int aDim, double *RESTRICT aR,
double *RESTRICT aWork);
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4324) // Padding added due to alignment request
#endif
protected:
// Order suggested by icc to improve locality
// 'mV, mU, mSqrtCodonFreq, mNumGoodFreq, mQ, mD, mCodonFreq, mGoodFreq'
double ALIGN64 mV[N * N]; ///< The right adjusted eigenvectors matrix (with
/// the new method instead contains pi^1/2*R where R
/// are the eigenvectors)
double ALIGN64 mU[N * N]; ///< The left adjusted eigenvectors matrix
#ifdef FORCE_IDENTITY_MATRIX
double ALIGN64 mIdentity[N * N]; ///< Pre-filled identify matix
#endif
const double *
mSqrtCodonFreq; ///< Square root of experimental codon frequencies
int mNumGoodFreq; ///< Number of codons whose frequency is not zero (must be
/// int)
#ifndef USE_LAPACK
double ALIGN64 mQ[N * N]; ///< The Q matrix
#else
double ALIGN64 mS[N * N]; ///< The S matrix (remember Q = S*pi)
#endif
double ALIGN64 mD[N]; ///< The matrix eigenvalues stored in reverse order
const double *mCodonFreq; ///< Experimental codon frequencies
std::bitset<N>
mGoodFreq; ///< True if the corresponding codon frequency is not small
#ifdef _MSC_VER
#pragma warning(pop)
#endif
};
/// The transition matrix that can be saved ad restored afterwards.
///
/// @author Mario Valle - Swiss National Supercomputing Centre (CSCS)
/// @date 2012-09-07 (initial version)
/// @version 1.1
///
class CheckpointableTransitionMatrix : public TransitionMatrix {
public:
/// Constructor.
///
CheckpointableTransitionMatrix() : TransitionMatrix(), mSavedScale(1.) {}
/// Save a checkpoint of the matrices
///
/// @param[in] aScale The matrix scale to be saved
///
void saveCheckpoint(double aScale);
/// Restore the status at the last checkpoint
/// Note: no check if the saved status is valid
///
/// @return The original matrix scale
///
double restoreCheckpoint(void);
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4324) // Padding added due to alignment request
#endif
private:
double mSavedScale; ///< Saved matrix scale value
double ALIGN64 mSavedV[N * N]; ///< The right adjusted eigenvectors matrix
///(with the new method instead contains
/// pi^1/2*R where R are the eigenvectors)
double ALIGN64 mSavedU[N * N]; ///< The left adjusted eigenvectors matrix
#ifndef USE_LAPACK
double ALIGN64 mSavedQ[N * N]; ///< The Q matrix
#else
double ALIGN64 mSavedS[N * N]; ///< The S matrix (remember Q = S*pi)
#endif
double ALIGN64 mSavedD[N]; ///< The matrix eigenvalues stored in reverse order
#ifdef _MSC_VER
#pragma warning(pop)
#endif
};
#endif