forked from neelsoumya/rlib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
commonfunc_ditched.stan
339 lines (303 loc) · 11.3 KB
/
commonfunc_ditched.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
// ------------------------------------------------------------------------
// Randomized probability matching (RPM)
// ------------------------------------------------------------------------
// Disappointingly, in real use this fails with bad_alloc errors, meaning,
// I think, out of memory (on a 16 Gb machine).
// So possibly we need to preprocess it in R.
/*
NOTES ON NUMERIC INTEGRATION IN STAN.
See Chapters 20.3 and 45 of the Stan 2017 manual.
Generic signature for an ODE in Stan:
real[] rpmOde(real t, // time
real[] y, // system state
real[] theta, // parameters
real[] x_r, // real data
int[] x_i) // integer data
{
// to pass empty parameters use e.g. "real someparam[0]"
// ...
return dy_dt;
// ... derivatives with respect to time of the state
// ... length must match that of y
}
Call it with e.g.
real solution[,];
// ... 2D array; solutions at the specified times
// ... number of COLUMNS is the length of "initial_state" and "y"
// ... number of ROWS is the length of "times"
solution = integrate_ode_rk45(
ode, // function
initial_state, // real[], initial state; same length as "y" (state)
initial_time, // real or int, initial time
times, // real[], solution times
theta, // real[], parameters
x_r, // real[], real data
x_i, // int[], integer data
rel_tol, // OPTIONAL; real; relative tolerance for the ODE solver
abs_tol, // OPTIONAL; real; relative tolerance for the ODE solver
max_num_steps // OPTIONAL; int; max #steps to take in ODE solver
)
Looking at the source, I suspect that all of
y0, theta, x (= x_r), x_int (= x_i)
are constant, and "t" varies.
For comparison, the integrate() function in R:
integrate(f, lower, upper, ...)
... f takes a numeric first argument and returns a numeric vector of
the same length [e.g. f(x) returns dy/xy, and integrate(f, x0, x1)
gives us y, specifically the definite integral of f(x) = dy/dx from
x0 to x1.
... returns list x where x$value is the final estimate.
FAILURE: if we try to pack information into the "state" variable it gets
altered, and if we try to use theta, x_r, or x_i, Stan moans that we must
pass arguments that are "data only and [must] not reference parameters",
by which it means defined in the "data" block (not as local variables).
This is a limitation of the integrate_ode_*() functions.
More on numerical integration in Stan:
http://mc-stan.org/events/stancon2017-notebooks/stancon2017-margossian-gillespie-ode.html
https://github.com/stan-dev/stan/blob/develop/src/test/test-models/good/integrate_ode_rk45.stan
https://github.com/stan-dev/math/blob/develop/stan/math/prim/arr/functor/integrate_ode_rk45.hpp
Pretty difficult to get solution_times sorted!
"fourth argument to integrate_ode_rk45 (solution times) must be
data only and not reference parameters"
- https://groups.google.com/forum/#!topic/stan-users/5wZmNcdjzb4
- http://discourse.mc-stan.org/t/ode-integrator-calls-from-user-defined-functions/2043/4
We could, of course, build our own integrator...
*/
/*
real betaFunction(real alpha, real beta)
{
// Not actually used.
// Stan 2017 (v2.16.0) manual p445; see also pp 532, 601.
return exp(lbeta(alpha, beta));
}
*/
real dbeta(real x, real a, real b)
{
// Implementation of R's dbeta function.
// shape1 == a
// shape2 == b
// R's "dDIST" functions are PDFs; e.g. dnorm(0) = 0.3989.
//
// Tried this:
//
// d = exp(beta_lpdf(x | shape1, shape2)); // funny notation!
//
// ... but it fails under a number of circumstances.
//
// So: https://github.com/wch/r-source/blob/trunk/src/nmath/dbeta.c
// real d;
// real lval;
if (a < 0 || b < 0) {
reject("dbeta: Bad parameters");
}
if (x < 0 || x > 1) {
return 0; // R_D__0 means 0
}
// limit cases for (a,b), leading to point masses
if (a == 0 || b == 0 || is_inf(a) || is_inf(b)) {
if (a == 0 && b == 0) { // point mass 1/2 at each of {0,1} :
if (x == 0 || x == 1) {
return positive_infinity();
} else {
return 0;
}
}
if (a == 0 || a/b == 0) { // point mass 1 at 0
if (x == 0) {
return positive_infinity();
} else {
return 0;
}
}
if (b == 0 || b/a == 0) { // point mass 1 at 1
if (x == 1) {
return positive_infinity();
} else {
return 0;
}
}
// else, remaining case: a = b = Inf : point mass 1 at 1/2
if (x == 0.5) {
return positive_infinity();
} else {
return 0;
}
}
if (x == 0) {
if (a > 1) {
return 0;
}
if (a < 1) {
return positive_infinity();
}
/* a == 1 : */
// return(R_D_val(b));
// ... see below: empirically, dbeta(0, 1, b) -> b
return b;
}
if (x == 1) {
if (b > 1) {
return 0;
}
if (b < 1) {
return positive_infinity();
}
/* b == 1 : */
// return(R_D_val(a));
// ... defined in dpq.h as:
// #define R_D_val(x) (log_p ? log(x) : (x))
// ... and empirically, dbeta(1, a, 1) -> a
return a;
}
// Possibly now we can go back to Stan's?
// if (a <= 2 || b <= 2) {
// lval = (a - 1) * log(x) + (b - 1) * log1p(-x) - lbeta(a, b);
// } else {
// lval = log(a+b-1) + dbinom_raw(a-1, a+b-2, x, 1-x, TRUE);
// }
// d = exp(lval);
return exp(beta_lpdf(x | a, b)); // funny notation!
/*
d = exp(beta_lpdf(x | a, b)); // funny notation!
if (is_nan(d)) {
reject(
"dbeta failing:",
" x=", x,
", a=", a,
", b=", b,
", beta_lpdf(x | a, b)=", beta_lpdf(x | a, b),
", d=", d);
}
return d;
*/
}
real pbeta(real q, real shape1, real shape2)
{
// Implementation of R's pbeta function.
// q is a quantile.
// Regardless of the way R's help phrases is, "pDIST" functions are
// CUMULATIVE (PROBABILITY) DENSITY functions; e.g.
// pnorm(0) == 0.5; pnorm(-999) ~= 0; pnorm(999) ~= 1.
// The qnorm() and pnorm() functions reverse each other.
//
// shape1 == alpha
// shape2 == beta
// However, beta_cdf complains about values of 1 (probably meaning 1
// plus a tiny bit), whereas R's is more forgiving. We need that.
if (q >= 1) {
return 1;
}
if (q <= 0) {
return 0;
}
return beta_cdf(q, shape1, shape2);
// if (is_nan(p)) {
// reject("pbeta failing:",
// " q=", q,
// ", shape1=", shape1,
// ", shape2=", shape2,
// ", p=", p);
// }
// return p;
}
vector rpmDifferential(real x, vector wins, vector losses, int k)
{
// Internal function used by rpmComputeProbopt
vector[k] answer;
real r;
for (i in 1:k) {
r = dbeta(x, wins[i] + 1, losses[i] + 1);
for (j in 1:k) {
if (j != i) {
r = r * pbeta(x, wins[j] + 1, losses[j] + 1);
}
}
answer[i] = r;
/*
if (is_nan(r)) {
reject("rpmDifferential failing:",
" x=", x,
", wins=", wins,
", losses=", losses,
", r=", r);
}
*/
}
// print("rpmDifferential(",
// "x=", x,
// ", wins=", wins,
// ", losses=", losses,
// ") -> ", answer);
return answer;
}
vector rpmComputeProbopt(vector wins, // 'y' in Scott 2010; length 'k'
vector totals) // 'n' in Scott 2010; length 'k'
{
// Matches output from the R version compute.probopt in Scott 2010.
// This version uses the trapezium rule for integration.
int k = num_elements(wins);
vector[k] losses;
vector[k] optimum_probabilities;
// Constants used in the integration:
real x_start = 0.0; // lower limit of integral
real x_end = 1.0; // upper limit of integral
real x_span = x_end - x_start;
int N_int = 1000; // number of trapezoids
real N_real = N_int;
real dx = x_span / N_real;
// Working variables:
vector[k] f_x_previous;
vector[k] f_x;
real x;
if (num_elements(totals) != k) {
reject("rpmComputeProbopt num_elements(totals) != ",
"num_elements(wins) == k == ", k);
}
for (i in 1:k) {
losses[i] = totals[i] - wins[i];
if (wins[i] < 0) {
reject("rpmComputeProbopt: invalid data with wins < 0 at ",
"index ", i);
}
if (losses[i] < 0) {
reject("rpmComputeProbopt: invalid data with wins > total at ",
"index ", i);
}
optimum_probabilities[i] = 0.0;
}
f_x_previous = rpmDifferential(x_start, wins, losses, k); // value at x=0
x = x_start;
for (i in 1:N_int) {
// x = x_start + i * dx;
x = x + dx;
f_x = rpmDifferential(x, wins, losses, k);
optimum_probabilities = optimum_probabilities +
(f_x_previous + f_x); // trapezium area * 2 / dx
f_x_previous = f_x;
}
optimum_probabilities = optimum_probabilities * dx / 2.0;
// ... finally arriving at the trapezium rule
// ... more efficient to do the multiplication/division once only
// print("rpmComputeProbopt(",
// "wins=", wins,
// ", totals=", totals,
// ") -> ", optimum_probabilities);
return optimum_probabilities;
}
/*
// Test our RPM implementation:
vector[2] wins;
vector[2] totals;
vector[2] rpmprob;
wins[1] = 19;
wins[2] = 19;
totals[1] = 48;
totals[2] = 28;
// wins[1] = 5;
// wins[2] = 10;
// totals[1] = 12;
// totals[2] = 20;
rpmprob = rpmComputeProbopt(wins, totals);
print(rpmprob);
reject("done");
*/