-
Notifications
You must be signed in to change notification settings - Fork 0
/
Stats_Fcns_Eigen.cpp
364 lines (281 loc) · 12.1 KB
/
Stats_Fcns_Eigen.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
// case: when linking against Rmath as a stand-alone library
#ifdef MATHLIB_STANDALONE
#include <Eigen/Dense>
#include <stdexcept> // runtime_error
#include <R.h> // rPsort
#include <Rmath.h> // rnorm
/* Creates the same alias when using Rmath as a stand-alone library as when we
* are calling this code through R via Rcpp. This allows us to use the same
* function call for both scenarios.
*/
namespace R {
inline double rnorm(double mu, double sigma) { return ::rnorm(mu, sigma); }
}
/* case: if not linking against Rmath as a stand-alone library, then assumes
* that we are linking against R via Rcpp
*/
#else
#include <RcppEigen.h>
#endif
#define TOLERANCE 0.000000000001 // 1e-12
/* Generates values for beta where the number of nonzero values in beta is given
* by floor(prop_nonzero * p) and are evenly spaced along the vector. The
* nonzero entries of beta are sampled from a N(0, true_beta_sd) distribution.
*/
Eigen::VectorXd sample_beta(int p, double prop_nonzero, double true_beta_sd) {
Eigen::VectorXd beta(p); // Allocate memory to store beta vector
int n_nonzero; // Number of nonzero elements of beta
double stepsize; // Fractional amount of zero entries between nonzero
double next_idx_dbl; // Fractional index of next nonzero entry
int next_idx_int; // Integer index of next nonzero entry
/* Note that in the following casts from double to int we need not consider
* overflow since p is known to be a representable number, and both
* prop_nonzero * p and next_idx_dbl are no bigger than p.
*/
n_nonzero = (int) (prop_nonzero * p);
stepsize = p / (double) (n_nonzero + 1);
next_idx_dbl = stepsize;
next_idx_int = (int) next_idx_dbl;
/* Each iteration sets the value of beta(j). Add a machine error smudge
* factor mainly to prevent the last element from being erroneously
* considered a nonzero element due to the sum of stepsize being less than p
* - 1 due to machine error.
*/
for (int j = 0; j < p; j++) {
/* case: a nonzero value. Sample beta(j). Next, calculate where the
* next value would be placed if we could evenly specify it on the real
* line, and then set the next index for a nonzero value to be the floor
* of the true evenly spaced value.
*/
if (j == next_idx_int) {
beta(j) = R::rnorm(0, true_beta_sd);
next_idx_dbl += stepsize;
next_idx_int = (int) (next_idx_dbl + TOLERANCE);
}
// case: a zero value for beta
else {
beta(j) = 0;
}
}
return beta;
}
/* Returns an nrow by ncol matrix such that the elements in the matrix are
* randomly sampled from independent N(0, sd^2) distributions.
*/
Eigen::MatrixXd matr_randn(int nrow, int ncol, double sd) {
Eigen::MatrixXd X(nrow, ncol);
double* curr = X.data();
double* end = curr + X.size();
for ( ; curr < end; curr++) {
*curr = R::rnorm(0, sd);
}
return X;
}
/* Generates a random sample from a N(m, V) distribution using an eigen
* decomposition of V. Note that the function uses the fact the the eigenvalues
* are provided by eig_sym in increasing order.
*
* PRE: assumes that V is symmetric and positive-definite, and that m has the
* same number of elements as V has rows.
*/
Eigen::VectorXd mvrnorm_eigen(Eigen::VectorXd& m, Eigen::MatrixXd& V) {
int p = m.size();
Eigen::VectorXd eigval; // Store eigenvalues of V
Eigen::VectorXd root_eigval; // Store square root of e-values of V
Eigen::MatrixXd eigvec; // Store eigenvectors of V
Eigen::VectorXd mvnsamp; // Store a random sample from indep N(0, 1)'s
// Perform eigenvalue decomposition
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> eig_decomp(V);
if (eig_decomp.info() != Eigen::Success) {
throw std::runtime_error("Error in eigen decomposition");
}
eigval = eig_decomp.eigenvalues();
eigvec = eig_decomp.eigenvectors();
/* Calculate elementwise square root of eigenvalues. Ensure beforehand that
* eigenvalues that are negative due to machine error are set to zero.
*/
double* curr = eigval.data();
double* end = curr + p;
// Iterate through eigenvalues until we reach positive eigenvalues
for ( ; (curr != end) && (*curr < 0); curr++) {
*curr = 0;
}
root_eigval = eigval.array().sqrt();
/* Eigen decomposition provides the relation V = PDP^{T} where P is an
* orthogonal matrix where the columns are eigenvectors and D is a diagonal
* matrix composed of the corresponding eigenvalues. It follows that for
* independent normal r.v. z:
*
* Var[P D^{1/2} z] = P D^{1/2} Var[z] D^{1/2} P^{T}
* = P D^{1/2} I D^{1/2} P^{T}
* = PDP^{-1}
* = V
*/
return m + (eigvec * root_eigval.cwiseProduct(matr_randn(p, 1, 1)));
}
/* Generates a random sample from a N(m, V) distribution using an Cholesky
* decomposition of V.
*
* PRE: assumes that V is symmetric and positive-definite, and that m has the
* same number of elements as V has rows.
*/
Eigen::VectorXd mvrnorm_chol(Eigen::VectorXd& m, Eigen::MatrixXd& V) {
// Perform Cholesky decomposition
Eigen::LLT<Eigen::MatrixXd> R(V);
if (R.info() != Eigen::Success) {
throw std::runtime_error("Error in Cholesky decomposition");
}
/* Cholesky decomposition provides the relation V = RR^{T} so that for
* independent normal r.v. z:
*
* Var[Rz] = R Var[z] R^{T} = RIR^{T} = RR^{T} = V
*/
return m + (R.matrixL() * matr_randn(m.size(), 1, 1));
}
/* Calculates the empirical quantiles of the data pointed to by values and with
* quantile levels corresponding to the indices provided by quant_below_idx(k)
* and quant_below_idx(k) + 1 and interpolated by gamma(k) - see below for
* definitions of quant_below_idx (corresponds to j) and gamma. Return object
* is the vector of quantiles.
*
* PRE: assumes that data_len >= 2, that quant_below_idx is nondecreasing, and
* that gamma and quant_below_idx have the same length.
*
* - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* The quantile calculation part of this function is an implementation of the R
* function stats::quantile under the default choice of empirical quantile. See
* the help page for stats::quantile for more details (the default there is type
* 7). The formula is (for n the number of observations in the data):
*
* F^{-1}(prob) = ((1 - gamma) * x_j) + (gamma * x_{j+1})
*
* where
*
* j = floor(prob * (n - 1))
* gamma = (prob * (n - 1)) - floor(prob * (n - 1))
*/
Eigen::VectorXd quantile(double* values,
int data_len,
Eigen::VectorXi& quant_below_idx,
Eigen::VectorXd& gamma) {
int nprobs = quant_below_idx.size(); // num empirical quantiles
int curr_size = data_len; // num samples in data
int relative_below_idx[nprobs]; // index relative to previous index
double* curr_start; // start of the current subset of the data
double* end; // 1 past the end of the data
double* kplus1; // 1 past the start of the current subset of the data
double* curr; // pointer to the current value
double* curr_min; // pointer to the current minimum
int below_idx; // index leq to desired index location
int above_idx; // index one greater than below_idx(k)
double temp; // used for data swap
Eigen::VectorXd out(nprobs); // storage for empirical quantiles
// Calculate initial starting location and absolute end of data
curr_start = values;
end = curr_start + data_len;
// Calculate indices relative to their previous index
relative_below_idx[0] = quant_below_idx(0);
for (int i = 1; i < nprobs; i++) {
relative_below_idx[i] = quant_below_idx(i) - quant_below_idx(i - 1);
}
/* Each iteration calculates the empirical quantile value of the k-th
* probability. The main task is to partially sort the data so that the
* values in the quant_below_idx(k)-th index and the index immediately
* following are the same is if the data had been fully sorted.
*
* This is done by partially sorting the data with indices higher than the
* partial sort in the previous iteration.
*/
for (int k = 0; k < nprobs; k++) {
/* Partial sorting on k-th quantile index; i.e. data is permuted so that
* every value with index less than k-th quantile index of the data is
* smaller than value at the k-th quantile index, and every value with
* index larger than k-th quantile index of the data is larger.
*/
rPsort(curr_start, curr_size, relative_below_idx[k]);
/* Find the index of the (k + 2)-th largest value. We start
* curr_min_ptr at the (k + 1)-th index b/c we know every value with
* smaller index is <= to the value there.
*/
kplus1 = curr_start + relative_below_idx[k] + 1;
curr_min = kplus1;
for (curr = curr_min; curr < end; curr++) {
if (*curr < *curr_min) {
curr_min = curr;
}
}
// Swap the value in the (k + 1)-th index with the (k + 2)-th largest value
temp = *kplus1;
*kplus1 = *curr_min;
*curr_min = temp;
/* Calculate empirical quantile value. Note that we transpose the
* indices so as to save the quantile data in variable by quantile
* level shape. It is (k + 1) rather than k b/c we fill the 0-th
* column with the true values of beta.
*/
below_idx = quant_below_idx(k);
above_idx = below_idx + 1;
out(k) = ((1 - gamma(k)) * values[below_idx]) + (gamma(k) * values[above_idx]);
// Reflect the subset of the data that needs partial sorting
curr_start += relative_below_idx[k];
curr_size = end - curr_start;
}
return out;
}
/* Calculate the empirical quantiles for each column of X and quantile levels
* given by prob, and return as elements of a Eigen::MatrixXd. The first column
* of the return object is the true values of the variables, and the (j, k + 1)
* element of the return object is the quantile value of the j-th variable at
* the k-th quantile level.
*
* PRE: assumes that true_beta is the same length as the number of columns of X,
* that X has >= 2 rows of data, and that prob is nondecreasing and has values
* in (0, 1).
*/
Eigen::MatrixXd quantile_table(Eigen::VectorXd& true_beta,
Eigen::MatrixXd& X,
Eigen::VectorXd& prob) {
int p = X.cols(); // number of variables
int nprobs = prob.size(); // number of probabilities
int nsamp = X.rows(); // number of samples
double* col_start; // first element in X.col(j)
// Container for quantile values
Eigen::MatrixXd out(nprobs + 1, p);
// Quantile indices, interpolation vals ----------------
double desired; // idx you might choose point if you had inf points
Eigen::VectorXi below_idx(nprobs); // closest index below desired point
Eigen::VectorXd gamma(nprobs); // linear interpolation value
/* Calculate data indices at which the quantiles are calculated and the
* corresponding linear interpolation values
*/
for (int k = 0; k < nprobs; k++) {
// Minus 1 b/c of 0-based indexing
desired = prob(k) * (nsamp - 1);
below_idx(k) = (int) desired;
gamma(k) = desired - below_idx(k);
}
// Fill in values of table -----------------------------
// Fill in true values of beta in first row
out.topRows(1) = true_beta.transpose();
/* Each iteration fills in rows 1-3 of column j with the empirical quantile
* levels of the j-th column of X
*/
col_start = X.data();
for (int j = 0; j < p; j++) {
// Fill in empirical quantile levels
out.block<3, 1>(1, j) = quantile(col_start, nsamp, below_idx, gamma);
// Update col_start to point to first element of (j + 1)-th column
col_start += nsamp;
}
return out.transpose();
}
/* Overloaded version of quantile_table that calculates the quantiles for a
* vector of data
*/
Eigen::MatrixXd quantile_table(double true_beta, Eigen::VectorXd& X, Eigen::VectorXd& prob) {
Eigen::VectorXd true_beta_vec(1);
Eigen::MatrixXd Xmat = X;
true_beta_vec << true_beta;
return quantile_table(true_beta_vec, Xmat, prob);
}