From 55a9a037f677886cc113f809632b51c820685bfb Mon Sep 17 00:00:00 2001 From: Travis CI Date: Thu, 7 Dec 2023 16:42:11 +0000 Subject: [PATCH] Deploy to GitHub Pages: 5cecff8e74370652e8330cb41d4571cd5fae111f --- TMB_8hpp_source.html | 4 ++-- TMBad_2integrate_8hpp_source.html | 2 +- TMBad_2vectorize_8hpp_source.html | 2 +- TMBad_8cpp_source.html | 2 +- TMBad_8hpp_source.html | 2 +- ad__blas_8hpp_source.html | 2 +- atomic__macro_8hpp_source.html | 4 ++-- checkpoint_8hpp_source.html | 2 +- code__generator_8hpp_source.html | 2 +- compile_8hpp_source.html | 2 +- compression_8hpp_source.html | 2 +- eigen__numtraits_8hpp_source.html | 2 +- global_8hpp_source.html | 2 +- graph2dot_8hpp_source.html | 2 +- graph__transform_8hpp_source.html | 2 +- mask_8hpp_source.html | 2 +- tmb__enable__header__only_8hpp_source.html | 2 +- tmb__enable__precompile_8hpp_source.html | 2 +- tmbad__allow__comparison_8hpp_source.html | 2 +- tmbad__atomic__macro_8hpp_source.html | 2 +- 20 files changed, 22 insertions(+), 22 deletions(-) diff --git a/TMB_8hpp_source.html b/TMB_8hpp_source.html index e1ad7e029..d22ead486 100644 --- a/TMB_8hpp_source.html +++ b/TMB_8hpp_source.html @@ -73,11 +73,11 @@
TMB.hpp
-Go to the documentation of this file.
1 // Copyright (C) 2013-2015 Kasper Kristensen
2 // License: GPL-2
3 /* Utility: Compile time test for Type=double */
4 template<class Type>
5 struct isDouble{
6  enum{value=false};
7 };
8 template<>
9 struct isDouble<double>{
10  enum{value=true};
11 };
12 
16 /* To be removed */
17 #define TMB_DEBUG 0
18 #define TMB_PRINT(x)std::cout << #x << ": " << x << "\n"; std::cout.flush();
19 
20 /* Conditionally skip compilation */
21 #ifdef WITH_LIBTMB
22 #define CSKIP(...) ;
23 #define TMB_EXTERN extern
24 #else
25 #define CSKIP(...) __VA_ARGS__
26 #define TMB_EXTERN
27 #endif
28 #ifdef TMB_PRECOMPILE
29 #define IF_TMB_PRECOMPILE(...) __VA_ARGS__
30 #else
31 #define IF_TMB_PRECOMPILE(...)
32 #endif
33 
34 /* Must come before Rinternals.h */
35 #ifdef _OPENMP
36 #include <omp.h>
37 #endif
38 
39 /* Early inclusion of Rprintf and REprintf */
40 #include <R_ext/Print.h>
41 #include "Rstream.hpp"
42 
43 /* Flag to bypass abort() */
44 #ifndef TMB_ABORT
45 #define TMB_ABORT abort()
46 #endif
47 
48 /* Include the Eigen library. */
49 #ifdef TMB_SAFEBOUNDS
50 #undef NDEBUG
51 #undef eigen_assert
52 void eigen_REprintf(const char* x);
53 #define eigen_assert(x) if (!(x)) { eigen_REprintf("TMB has received an error from Eigen. "); \
54  eigen_REprintf("The following condition was not met:\n"); \
55  eigen_REprintf(#x); \
56  eigen_REprintf("\nPlease check your matrix-vector bounds etc., "); \
57  eigen_REprintf("or run your program through a debugger.\n"); \
58  TMB_ABORT;}
59 #define TMBAD_ASSERT2(x,msg) \
60 if (!(x)) { \
61  Rcerr << "TMBad assertion failed.\n"; \
62  Rcerr << "The following condition was not met: " << #x << "\n"; \
63  Rcerr << "Possible reason: " msg << "\n"; \
64  Rcerr << "For more info run your program through a debugger.\n"; \
65  TMB_ABORT; \
66 }
67 #define TMBAD_ASSERT(x) TMBAD_ASSERT2(x,"Unknown")
68 #else
69 #undef NDEBUG
70 #define NDEBUG 1
71 #define TMBAD_ASSERT2(x,msg) (void) (x);
72 #define TMBAD_ASSERT(x) (void) (x);
73 #endif
74 /* Provide access to file 'DisableStupidWarnings.h' which has been
75  patched by RcppEigen to satisfy CRAN policy. This file may need
76  regular updating. The renaming is to aviod a CRAN note. */
77 #ifdef TMB_EIGEN_DISABLE_WARNINGS
78 #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
79 #define EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS 1
80 #endif
81 #include "EigenWarnings/DisableStupidWarnings"
82 #endif
83 #include <Eigen/Dense>
84 
85 // Default: Include Eigen/Sparse normally
86 #ifndef TMB_SPARSE_STORAGE_INDEX
87 #include <Eigen/Sparse>
88 #else
89 // Alternative: Include Eigen/Sparse with custom sparse matrix integer type
90 #define SparseMatrix SparseMatrix_rename
91 #include <Eigen/Sparse>
92 #undef SparseMatrix
93 namespace Eigen {
94 template<class T, int Flags = 0, class StorageIndex = TMB_SPARSE_STORAGE_INDEX>
95 using SparseMatrix = SparseMatrix_rename<T, Flags, StorageIndex>;
96 }
97 #endif
98 
99 /* Workaround side effect when -DEIGEN_USE_LAPACKE is set */
100 #undef I
101 
102 /* Select AD framework: TMBAD or CPPAD */
103 #ifndef CPPAD_FRAMEWORK
104 #ifndef TMBAD_FRAMEWORK
105 #define CPPAD_FRAMEWORK
106 #endif
107 #endif
108 
109 /* Include the CppAD library. (Always turn off debug for cppad) */
110 #undef NDEBUG
111 #define NDEBUG 1
112 #include "cppad/cppad.hpp"
113 #ifdef TMBAD_FRAMEWORK
114 #include "TMBad/TMBad.hpp"
115 #include "TMBad/tmbad_allow_comparison.hpp"
116 #include "TMBad/eigen_numtraits.hpp"
117 #undef error
118 #include "TMBad/compile.hpp"
119 #include "TMBad/graph2dot.hpp"
120 #include "TMBad/compression.hpp"
121 #include "TMBad/ad_blas.hpp"
122 #ifndef WITH_LIBTMB
123 #include "TMBad/TMBad.cpp"
124 #endif
125 #define error Rf_error
126 // Workaround to make CppAD::Integer working with TMBad
127 namespace CppAD {
128 int Integer(const TMBad::ad_aug &x) CSKIP ({ return (int) x.Value(); })
129 TMBad::ad_aug abs(const TMBad::ad_aug &x) CSKIP ({ return TMBad::fabs(x); })
130 #define TMBAD_CONDEXP(NAME) \
131 TMBad::ad_aug CondExp ## NAME( \
132  const TMBad::ad_aug &x0, \
133  const TMBad::ad_aug &x1, \
134  const TMBad::ad_aug &x2, \
135  const TMBad::ad_aug &x3) CSKIP ( { \
136  return TMBad::CondExp ## NAME(x0, x1, x2, x3); \
137 })
138 TMBAD_CONDEXP(Eq)
139 TMBAD_CONDEXP(Ne)
140 TMBAD_CONDEXP(Lt)
141 TMBAD_CONDEXP(Gt)
142 TMBAD_CONDEXP(Le)
143 TMBAD_CONDEXP(Ge)
144 #undef TMBAD_CONDEXP
145 bool Variable(const TMBad::ad_aug &x) CSKIP ({ return !x.constant(); })
146 }
147 // FIXME: Move to TMBad source?
148 namespace TMBad {
149  /* Add 'isfinite', 'isinf' and 'isnan' to TMBad */
150  using std::isfinite;
151  bool isfinite(const TMBad::ad_aug &x)CSKIP({ return isfinite(x.Value()); })
152  using std::isinf;
153  bool isinf(const TMBad::ad_aug &x)CSKIP({ return isinf(x.Value()); })
154  using std::isnan;
155  bool isnan(const TMBad::ad_aug &x)CSKIP({ return isnan(x.Value()); })
156 }
157 #endif
158 
159 /* Include the R library _after_ Eigen and CppAD. Otherwise, the R
160  macros can cause conflicts (as they do not respect the Eigen and
161  CppAD namespace limits). E.g., the 'length' macro conflicts with
162  CppAD when compiling with '-std=c++11'. */
163 #include <R.h>
164 #include <Rinternals.h>
165 #include "toggle_thread_safe_R.hpp"
166 void eigen_REprintf(const char* x)CSKIP({REprintf("%s",x);})
167 
168 #include "tmbutils/tmbutils.hpp"
169 #include "tmbutils/vectorize.hpp"
170 using tmbutils::matrix;
171 using tmbutils::vector;
172 using CppAD::AD;
173 using CppAD::ADFun;
174 namespace CppAD{
175  /* Add to CppAD so that 'Variable' works for any 'Type' */
176  bool Variable(double x)CSKIP({ return false; })
177  /* Add 'isfinite', 'isinf' and 'isnan' to CppAD */
178  using std::isfinite;
179  template <class T>
180  bool isfinite(const AD<T> &x)CSKIP({ return isfinite(Value(x)); })
181  using std::isinf;
182  template <class T>
183  bool isinf(const AD<T> &x)CSKIP({ return isinf(Value(x)); })
184  using std::isnan;
185  template <class T>
186  bool isnan(const AD<T> &x)CSKIP({ return isnan(Value(x)); })
187 }
188 #include "convert.hpp" // asSEXP, asMatrix, asVector
189 #include "config.hpp"
190 #include "tmbutils/getListElement.hpp"
191 #include "atomic_math.hpp"
192 #include "expm.hpp"
193 #include "atomic_convolve.hpp"
194 #include "tiny_ad/atomic.hpp"
195 #include "tiny_ad/integrate/integrate.hpp"
196 #include "dynamic_data.hpp" // Requires atomic namespace
197 #include "Vectorize.hpp"
198 #include "dnorm.hpp" // harmless
199 #include "lgamma.hpp" // harmless
200 #include "start_parallel.hpp"
201 #include "tmbutils/newton.hpp" // Newton solver + Laplace used by TransformADFunObject
202 #include "tmb_core.hpp"
203 #include "distributions_R.hpp"
204 #include "convenience.hpp" // Requires besselK
205 #include "tmbutils/tmbutils_extra.hpp"
206 #include "tmbutils/R_inla.hpp"
207 #include "tmbutils/sparse_matrix_exponential.hpp"
208 #include "tmbutils/concat.hpp"
209 #include "precompile.hpp" // Must come last
210 using tmbutils::array;
211 using Eigen::Matrix;
212 using Eigen::Array;
213 
214 /* Cleanup */
215 
216 // Nothing more to precompile
217 #undef CSKIP
218 #define CSKIP(...) __VA_ARGS__
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+Go to the documentation of this file.
1 // Copyright (C) 2013-2015 Kasper Kristensen
2 // License: GPL-2
3 /* Utility: Compile time test for Type=double */
4 template<class Type>
5 struct isDouble{
6  enum{value=false};
7 };
8 template<>
9 struct isDouble<double>{
10  enum{value=true};
11 };
12 
16 /* To be removed */
17 #define TMB_DEBUG 0
18 #define TMB_PRINT(x)std::cout << #x << ": " << x << "\n"; std::cout.flush();
19 
20 /* Conditionally skip compilation */
21 #ifdef WITH_LIBTMB
22 #define CSKIP(...) ;
23 #define TMB_EXTERN extern
24 #else
25 #define CSKIP(...) __VA_ARGS__
26 #define TMB_EXTERN
27 #endif
28 #ifdef TMB_PRECOMPILE_ATOMICS
29 #define IF_TMB_PRECOMPILE_ATOMICS(...) __VA_ARGS__
30 #else
31 #define IF_TMB_PRECOMPILE_ATOMICS(...)
32 #endif
33 #ifdef HAVE_PRECOMPILED_ATOMICS
34 #define CSKIP_ATOMIC(...) ;
35 #else
36 #define CSKIP_ATOMIC(...) __VA_ARGS__
37 #endif
38 
39 /* Must come before Rinternals.h */
40 #ifdef _OPENMP
41 #include <omp.h>
42 #endif
43 
44 /* Early inclusion of Rprintf and REprintf */
45 #include <R_ext/Print.h>
46 #include "Rstream.hpp"
47 
48 /* Flag to bypass abort() */
49 #ifndef TMB_ABORT
50 #define TMB_ABORT abort()
51 #endif
52 
53 /* Include the Eigen library. */
54 #ifdef TMB_SAFEBOUNDS
55 #undef NDEBUG
56 #undef eigen_assert
57 void eigen_REprintf(const char* x);
58 #define eigen_assert(x) if (!(x)) { eigen_REprintf("TMB has received an error from Eigen. "); \
59  eigen_REprintf("The following condition was not met:\n"); \
60  eigen_REprintf(#x); \
61  eigen_REprintf("\nPlease check your matrix-vector bounds etc., "); \
62  eigen_REprintf("or run your program through a debugger.\n"); \
63  TMB_ABORT;}
64 #define TMBAD_ASSERT2(x,msg) \
65 if (!(x)) { \
66  Rcerr << "TMBad assertion failed.\n"; \
67  Rcerr << "The following condition was not met: " << #x << "\n"; \
68  Rcerr << "Possible reason: " msg << "\n"; \
69  Rcerr << "For more info run your program through a debugger.\n"; \
70  TMB_ABORT; \
71 }
72 #define TMBAD_ASSERT(x) TMBAD_ASSERT2(x,"Unknown")
73 #else
74 #undef NDEBUG
75 #define NDEBUG 1
76 #define TMBAD_ASSERT2(x,msg) (void) (x);
77 #define TMBAD_ASSERT(x) (void) (x);
78 #endif
79 /* Provide access to file 'DisableStupidWarnings.h' which has been
80  patched by RcppEigen to satisfy CRAN policy. This file may need
81  regular updating. The renaming is to aviod a CRAN note. */
82 #ifdef TMB_EIGEN_DISABLE_WARNINGS
83 #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
84 #define EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS 1
85 #endif
86 #include "EigenWarnings/DisableStupidWarnings"
87 #endif
88 #include <Eigen/Dense>
89 
90 // Default: Include Eigen/Sparse normally
91 #ifndef TMB_SPARSE_STORAGE_INDEX
92 #include <Eigen/Sparse>
93 #else
94 // Alternative: Include Eigen/Sparse with custom sparse matrix integer type
95 #define SparseMatrix SparseMatrix_rename
96 #include <Eigen/Sparse>
97 #undef SparseMatrix
98 namespace Eigen {
99 template<class T, int Flags = 0, class StorageIndex = TMB_SPARSE_STORAGE_INDEX>
100 using SparseMatrix = SparseMatrix_rename<T, Flags, StorageIndex>;
101 }
102 #endif
103 
104 /* Workaround side effect when -DEIGEN_USE_LAPACKE is set */
105 #undef I
106 
107 /* Select AD framework: TMBAD or CPPAD */
108 #ifndef CPPAD_FRAMEWORK
109 #ifndef TMBAD_FRAMEWORK
110 #define CPPAD_FRAMEWORK
111 #endif
112 #endif
113 
114 /* Include the CppAD library. (Always turn off debug for cppad) */
115 #undef NDEBUG
116 #define NDEBUG 1
117 #include "cppad/cppad.hpp"
118 #ifdef TMBAD_FRAMEWORK
119 #include "TMBad/TMBad.hpp"
120 #include "TMBad/tmbad_allow_comparison.hpp"
121 #include "TMBad/eigen_numtraits.hpp"
122 #undef error
123 #include "TMBad/compile.hpp"
124 #include "TMBad/graph2dot.hpp"
125 #include "TMBad/compression.hpp"
126 #include "TMBad/ad_blas.hpp"
127 #ifndef WITH_LIBTMB
128 #include "TMBad/TMBad.cpp"
129 #endif
130 #define error Rf_error
131 // Workaround to make CppAD::Integer working with TMBad
132 namespace CppAD {
133 int Integer(const TMBad::ad_aug &x) CSKIP ({ return (int) x.Value(); })
134 TMBad::ad_aug abs(const TMBad::ad_aug &x) CSKIP ({ return TMBad::fabs(x); })
135 #define TMBAD_CONDEXP(NAME) \
136 TMBad::ad_aug CondExp ## NAME( \
137  const TMBad::ad_aug &x0, \
138  const TMBad::ad_aug &x1, \
139  const TMBad::ad_aug &x2, \
140  const TMBad::ad_aug &x3) CSKIP ( { \
141  return TMBad::CondExp ## NAME(x0, x1, x2, x3); \
142 })
143 TMBAD_CONDEXP(Eq)
144 TMBAD_CONDEXP(Ne)
145 TMBAD_CONDEXP(Lt)
146 TMBAD_CONDEXP(Gt)
147 TMBAD_CONDEXP(Le)
148 TMBAD_CONDEXP(Ge)
149 #undef TMBAD_CONDEXP
150 bool Variable(const TMBad::ad_aug &x) CSKIP ({ return !x.constant(); })
151 }
152 // FIXME: Move to TMBad source?
153 namespace TMBad {
154  /* Add 'isfinite', 'isinf' and 'isnan' to TMBad */
155  using std::isfinite;
156  bool isfinite(const TMBad::ad_aug &x)CSKIP({ return isfinite(x.Value()); })
157  using std::isinf;
158  bool isinf(const TMBad::ad_aug &x)CSKIP({ return isinf(x.Value()); })
159  using std::isnan;
160  bool isnan(const TMBad::ad_aug &x)CSKIP({ return isnan(x.Value()); })
161 }
162 #endif
163 
164 /* Include the R library _after_ Eigen and CppAD. Otherwise, the R
165  macros can cause conflicts (as they do not respect the Eigen and
166  CppAD namespace limits). E.g., the 'length' macro conflicts with
167  CppAD when compiling with '-std=c++11'. */
168 #include <R.h>
169 #include <Rinternals.h>
170 #include "toggle_thread_safe_R.hpp"
171 void eigen_REprintf(const char* x)CSKIP({REprintf("%s",x);})
172 
173 #include "tmbutils/tmbutils.hpp"
174 #include "tmbutils/vectorize.hpp"
175 using tmbutils::matrix;
176 using tmbutils::vector;
177 using CppAD::AD;
178 using CppAD::ADFun;
179 namespace CppAD{
180  /* Add to CppAD so that 'Variable' works for any 'Type' */
181  bool Variable(double x)CSKIP({ return false; })
182  /* Add 'isfinite', 'isinf' and 'isnan' to CppAD */
183  using std::isfinite;
184  template <class T>
185  bool isfinite(const AD<T> &x)CSKIP({ return isfinite(Value(x)); })
186  using std::isinf;
187  template <class T>
188  bool isinf(const AD<T> &x)CSKIP({ return isinf(Value(x)); })
189  using std::isnan;
190  template <class T>
191  bool isnan(const AD<T> &x)CSKIP({ return isnan(Value(x)); })
192 }
193 #include "convert.hpp" // asSEXP, asMatrix, asVector
194 #include "config.hpp"
195 #include "tmbutils/getListElement.hpp"
196 #include "atomic_math.hpp"
197 #include "expm.hpp"
198 #include "atomic_convolve.hpp"
199 #include "tiny_ad/atomic.hpp"
200 #include "tiny_ad/integrate/integrate.hpp"
201 #include "dynamic_data.hpp" // Requires atomic namespace
202 #include "Vectorize.hpp"
203 #include "dnorm.hpp" // harmless
204 #include "lgamma.hpp" // harmless
205 #include "start_parallel.hpp"
206 #include "tmbutils/newton.hpp" // Newton solver + Laplace used by TransformADFunObject
207 #include "tmb_core.hpp"
208 #include "distributions_R.hpp"
209 #include "convenience.hpp" // Requires besselK
210 #include "tmbutils/tmbutils_extra.hpp"
211 #include "tmbutils/R_inla.hpp"
212 #include "tmbutils/sparse_matrix_exponential.hpp"
213 #include "tmbutils/concat.hpp"
214 #include "precompile.hpp" // Must come last
215 using tmbutils::array;
216 using Eigen::Matrix;
217 using Eigen::Array;
218 
219 /* Cleanup */
220 
221 // Nothing more to precompile
222 #undef CSKIP
223 #define CSKIP(...) __VA_ARGS__
224 #undef CSKIP_ATOMIC
225 #define CSKIP_ATOMIC(...) __VA_ARGS__
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
Array class used by TMB.
Definition: tmbutils.hpp:23
Augmented AD type.
Definition: global.hpp:2831
-
Definition: TMB.hpp:127
+
Definition: TMB.hpp:132
Scalar Value() const
Return the underlying scalar value of this ad_aug.
Definition: TMBad.cpp:2188
Matrix class used by TMB.
Definition: tmbutils.hpp:102
Vector class used by TMB.
Definition: tmbutils.hpp:18
diff --git a/TMBad_2integrate_8hpp_source.html b/TMBad_2integrate_8hpp_source.html index d55183680..b3cbf3686 100644 --- a/TMBad_2integrate_8hpp_source.html +++ b/TMBad_2integrate_8hpp_source.html @@ -73,7 +73,7 @@
TMBad/integrate.hpp
-
1 #ifndef HAVE_INTEGRATE_HPP
2 #define HAVE_INTEGRATE_HPP
3 // Autogenerated - do not edit by hand !
4 #include <float.h> // INFINITY etc
5 #include "global.hpp"
6 
7 namespace TMBad {
8 
14 template <class T>
15 double value(T x) {
16  return TMBad::Value(x);
17 }
18 double value(double x);
19 template <class S, class T>
20 int imin2(S x, T y) {
21  return (x < y) ? x : y;
22 }
23 template <class S, class T>
24 double fmin2(S x, T y) {
25  return (value(x) < value(y)) ? value(x) : value(y);
26 }
27 template <class S, class T>
28 double fmax2(S x, T y) {
29  return (value(x) < value(y)) ? value(y) : value(x);
30 }
31 template <class Float, class integr_fn>
32 static void rdqagie(integr_fn f, void *ex, Float *, int *, Float *, Float *,
33  int *, Float *, Float *, int *, int *, Float *, Float *,
34  Float *, Float *, int *, int *);
35 
36 template <class Float, class integr_fn>
37 static void rdqk15i(integr_fn f, void *ex, Float *, int *, Float *, Float *,
38  Float *, Float *, Float *, Float *);
39 
40 template <class Float, class integr_fn>
41 static void rdqagse(integr_fn f, void *ex, Float *, Float *, Float *, Float *,
42  int *, Float *, Float *, int *, int *, Float *, Float *,
43  Float *, Float *, int *, int *);
44 
45 template <class Float, class integr_fn>
46 static void rdqk21(integr_fn f, void *ex, Float *, Float *, Float *, Float *,
47  Float *, Float *);
48 
49 template <class Float>
50 static void rdqpsrt(int *, int *, int *, Float *, Float *, int *, int *);
51 
52 template <class Float>
53 static void rdqelg(int *, Float *, Float *, Float *, Float *, int *);
54 
55 template <class Float, class integr_fn>
56 void Rdqagi(integr_fn f, void *ex, Float *bound, int *inf, Float *epsabs,
57  Float *epsrel, Float *result, Float *abserr, int *neval, int *ier,
58  int *limit, int *lenw, int *last, int *iwork, Float *work) {
59  int l1, l2, l3;
60  *ier = 6;
61  *neval = 0;
62  *last = 0;
63  *result = 0.;
64  *abserr = 0.;
65  if (*limit < 1 || *lenw < *limit << 2) return;
66 
67  l1 = *limit;
68  l2 = *limit + l1;
69  l3 = *limit + l2;
70 
71  rdqagie(f, ex, bound, inf, epsabs, epsrel, limit, result, abserr, neval, ier,
72  work, &work[l1], &work[l2], &work[l3], iwork, last);
73 
74  return;
75 }
76 
77 template <class Float, class integr_fn>
78 static void rdqagie(integr_fn f, void *ex, Float *bound, int *inf,
79  Float *epsabs, Float *epsrel, int *limit, Float *result,
80  Float *abserr, int *neval, int *ier, Float *alist,
81  Float *blist, Float *rlist, Float *elist, int *iord,
82  int *last) {
83  Float area, dres;
84  int ksgn;
85  Float boun;
86  int nres;
87  Float area1, area2, area12;
88  int k;
89  Float small = 0.0, erro12;
90  int ierro;
91  Float a1, a2, b1, b2, defab1, defab2, oflow;
92  int ktmin, nrmax;
93  Float uflow;
94  bool noext;
95  int iroff1, iroff2, iroff3;
96  Float res3la[3], error1, error2;
97  int id;
98  Float rlist2[52];
99  int numrl2;
100  Float defabs, epmach, erlarg = 0.0, abseps, correc = 0.0, errbnd, resabs;
101  int jupbnd;
102  Float erlast, errmax;
103  int maxerr;
104  Float reseps;
105  bool extrap;
106  Float ertest = 0.0, errsum;
107  --iord;
108  --elist;
109  --rlist;
110  --blist;
111  --alist;
112 
113  epmach = DBL_EPSILON;
114 
115  *ier = 0;
116  *neval = 0;
117  *last = 0;
118  *result = 0.;
119  *abserr = 0.;
120  alist[1] = 0.;
121  blist[1] = 1.;
122  rlist[1] = 0.;
123  elist[1] = 0.;
124  iord[1] = 0;
125  if (*epsabs <= 0. && (*epsrel < fmax2(epmach * 50., 5e-29))) *ier = 6;
126  if (*ier == 6) return;
127  boun = *bound;
128  if (*inf == 2) {
129  boun = 0.;
130  }
131 
132  static Float c_b6 = 0.;
133  static Float c_b7 = 1.;
134 
135  rdqk15i(f, ex, &boun, inf, &c_b6, &c_b7, result, abserr, &defabs, &resabs);
136 
137  *last = 1;
138  rlist[1] = *result;
139  elist[1] = *abserr;
140  iord[1] = 1;
141  dres = fabs(*result);
142  errbnd = fmax2(*epsabs, *epsrel * dres);
143  if (*abserr <= epmach * 100. * defabs && *abserr > errbnd) *ier = 2;
144  if (*limit == 1) *ier = 1;
145  if (*ier != 0 || (*abserr <= errbnd && *abserr != resabs) || *abserr == 0.)
146  goto L130;
147 
148  uflow = DBL_MIN;
149  oflow = DBL_MAX;
150  rlist2[0] = *result;
151  errmax = *abserr;
152  maxerr = 1;
153  area = *result;
154  errsum = *abserr;
155  *abserr = oflow;
156  nrmax = 1;
157  nres = 0;
158  ktmin = 0;
159  numrl2 = 2;
160  extrap = false;
161  noext = false;
162  ierro = 0;
163  iroff1 = 0;
164  iroff2 = 0;
165  iroff3 = 0;
166  ksgn = -1;
167  if (dres >= (1. - epmach * 50.) * defabs) {
168  ksgn = 1;
169  }
170 
171  for (*last = 2; *last <= *limit; ++(*last)) {
172  a1 = alist[maxerr];
173  b1 = (alist[maxerr] + blist[maxerr]) * .5;
174  a2 = b1;
175  b2 = blist[maxerr];
176  erlast = errmax;
177  rdqk15i(f, ex, &boun, inf, &a1, &b1, &area1, &error1, &resabs, &defab1);
178  rdqk15i(f, ex, &boun, inf, &a2, &b2, &area2, &error2, &resabs, &defab2);
179 
180  area12 = area1 + area2;
181  erro12 = error1 + error2;
182  errsum = errsum + erro12 - errmax;
183  area = area + area12 - rlist[maxerr];
184  if (!(defab1 == error1 || defab2 == error2)) {
185  if (fabs(rlist[maxerr] - area12) <= fabs(area12) * 1e-5 &&
186  erro12 >= errmax * .99) {
187  if (extrap)
188  ++iroff2;
189  else
190  ++iroff1;
191  }
192  if (*last > 10 && erro12 > errmax) ++iroff3;
193  }
194 
195  rlist[maxerr] = area1;
196  rlist[*last] = area2;
197  errbnd = fmax2(*epsabs, *epsrel * fabs(area));
198 
199  if (iroff1 + iroff2 >= 10 || iroff3 >= 20) *ier = 2;
200  if (iroff2 >= 5) ierro = 3;
201 
202  if (*last == *limit) *ier = 1;
203 
204  if (fmax2(fabs(a1), fabs(b2)) <=
205  (epmach * 100. + 1.) * (fabs(a2) + uflow * 1e3)) {
206  *ier = 4;
207  }
208 
209  if (error2 <= error1) {
210  alist[*last] = a2;
211  blist[maxerr] = b1;
212  blist[*last] = b2;
213  elist[maxerr] = error1;
214  elist[*last] = error2;
215  } else {
216  alist[maxerr] = a2;
217  alist[*last] = a1;
218  blist[*last] = b1;
219  rlist[maxerr] = area2;
220  rlist[*last] = area1;
221  elist[maxerr] = error2;
222  elist[*last] = error1;
223  }
224 
225  rdqpsrt(limit, last, &maxerr, &errmax, &elist[1], &iord[1], &nrmax);
226  if (errsum <= errbnd) {
227  goto L115;
228  }
229  if (*ier != 0) break;
230  if (*last == 2) {
231  small = .375;
232  erlarg = errsum;
233  ertest = errbnd;
234  rlist2[1] = area;
235  continue;
236  }
237  if (noext) continue;
238 
239  erlarg -= erlast;
240  if (fabs(b1 - a1) > small) {
241  erlarg += erro12;
242  }
243  if (!extrap) {
244  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
245  continue;
246  }
247  extrap = true;
248  nrmax = 2;
249  }
250 
251  if (ierro != 3 && erlarg > ertest) {
252  id = nrmax;
253  jupbnd = *last;
254  if (*last > *limit / 2 + 2) {
255  jupbnd = *limit + 3 - *last;
256  }
257  for (k = id; k <= jupbnd; ++k) {
258  maxerr = iord[nrmax];
259  errmax = elist[maxerr];
260  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
261  goto L90;
262  }
263  ++nrmax;
264  }
265  }
266 
267  ++numrl2;
268  rlist2[numrl2 - 1] = area;
269  rdqelg(&numrl2, rlist2, &reseps, &abseps, res3la, &nres);
270  ++ktmin;
271  if (ktmin > 5 && *abserr < errsum * .001) {
272  *ier = 5;
273  }
274  if (abseps >= *abserr) {
275  goto L70;
276  }
277  ktmin = 0;
278  *abserr = abseps;
279  *result = reseps;
280  correc = erlarg;
281  ertest = fmax2(*epsabs, *epsrel * fabs(reseps));
282  if (*abserr <= ertest) {
283  break;
284  }
285 
286  L70:
287  if (numrl2 == 1) {
288  noext = true;
289  }
290  if (*ier == 5) {
291  break;
292  }
293  maxerr = iord[1];
294  errmax = elist[maxerr];
295  nrmax = 1;
296  extrap = false;
297  small *= .5;
298  erlarg = errsum;
299  L90:;
300  }
301 
302  if (*abserr == oflow) {
303  goto L115;
304  }
305  if (*ier + ierro == 0) {
306  goto L110;
307  }
308  if (ierro == 3) {
309  *abserr += correc;
310  }
311  if (*ier == 0) {
312  *ier = 3;
313  }
314  if (*result == 0. || area == 0.) {
315  if (*abserr > errsum) goto L115;
316 
317  if (area == 0.) goto L130;
318  } else {
319  if (*abserr / fabs(*result) > errsum / fabs(area)) {
320  goto L115;
321  }
322  }
323 
324 L110:
325  if (ksgn == -1 && fmax2(fabs(*result), fabs(area)) <= defabs * .01) {
326  goto L130;
327  }
328  if (.01 > *result / area || *result / area > 100. || errsum > fabs(area)) {
329  *ier = 6;
330  }
331  goto L130;
332 
333 L115:
334  *result = 0.;
335  for (k = 1; k <= *last; ++k) *result += rlist[k];
336 
337  *abserr = errsum;
338 L130:
339  *neval = *last * 30 - 15;
340  if (*inf == 2) {
341  *neval <<= 1;
342  }
343  if (*ier > 2) {
344  --(*ier);
345  }
346  return;
347 }
348 
349 template <class Float, class integr_fn>
350 void Rdqags(integr_fn f, void *ex, Float *a, Float *b, Float *epsabs,
351  Float *epsrel, Float *result, Float *abserr, int *neval, int *ier,
352  int *limit, int *lenw, int *last, int *iwork, Float *work) {
353  int l1, l2, l3;
354  *ier = 6;
355  *neval = 0;
356  *last = 0;
357  *result = 0.;
358  *abserr = 0.;
359  if (*limit < 1 || *lenw < *limit * 4) return;
360 
361  l1 = *limit;
362  l2 = *limit + l1;
363  l3 = *limit + l2;
364 
365  rdqagse(f, ex, a, b, epsabs, epsrel, limit, result, abserr, neval, ier, work,
366  &work[l1], &work[l2], &work[l3], iwork, last);
367 
368  return;
369 }
370 
371 template <class Float, class integr_fn>
372 static void rdqagse(integr_fn f, void *ex, Float *a, Float *b, Float *epsabs,
373  Float *epsrel, int *limit, Float *result, Float *abserr,
374  int *neval, int *ier, Float *alist, Float *blist,
375  Float *rlist, Float *elist, int *iord, int *last) {
376  bool noext, extrap;
377  int k, ksgn, nres;
378  int ierro;
379  int ktmin, nrmax;
380  int iroff1, iroff2, iroff3;
381  int id;
382  int numrl2;
383  int jupbnd;
384  int maxerr;
385  Float res3la[3];
386  Float rlist2[52];
387  Float abseps, area, area1, area2, area12, dres, epmach;
388  Float a1, a2, b1, b2, defabs, defab1, defab2, oflow, uflow, resabs, reseps;
389  Float error1, error2, erro12, errbnd, erlast, errmax, errsum;
390 
391  Float correc = 0.0, erlarg = 0.0, ertest = 0.0, small = 0.0;
392  --iord;
393  --elist;
394  --rlist;
395  --blist;
396  --alist;
397 
398  epmach = DBL_EPSILON;
399 
400  *ier = 0;
401  *neval = 0;
402  *last = 0;
403  *result = 0.;
404  *abserr = 0.;
405  alist[1] = *a;
406  blist[1] = *b;
407  rlist[1] = 0.;
408  elist[1] = 0.;
409  if (*epsabs <= 0. && *epsrel < fmax2(epmach * 50., 5e-29)) {
410  *ier = 6;
411  return;
412  }
413 
414  uflow = DBL_MIN;
415  oflow = DBL_MAX;
416  ierro = 0;
417  rdqk21(f, ex, a, b, result, abserr, &defabs, &resabs);
418 
419  dres = fabs(*result);
420  errbnd = fmax2(*epsabs, *epsrel * dres);
421  *last = 1;
422  rlist[1] = *result;
423  elist[1] = *abserr;
424  iord[1] = 1;
425  if (*abserr <= epmach * 100. * defabs && *abserr > errbnd) *ier = 2;
426  if (*limit == 1) *ier = 1;
427  if (*ier != 0 || (*abserr <= errbnd && *abserr != resabs) || *abserr == 0.)
428  goto L140;
429 
430  rlist2[0] = *result;
431  errmax = *abserr;
432  maxerr = 1;
433  area = *result;
434  errsum = *abserr;
435  *abserr = oflow;
436  nrmax = 1;
437  nres = 0;
438  numrl2 = 2;
439  ktmin = 0;
440  extrap = false;
441  noext = false;
442  iroff1 = 0;
443  iroff2 = 0;
444  iroff3 = 0;
445  ksgn = -1;
446  if (dres >= (1. - epmach * 50.) * defabs) {
447  ksgn = 1;
448  }
449 
450  for (*last = 2; *last <= *limit; ++(*last)) {
451  a1 = alist[maxerr];
452  b1 = (alist[maxerr] + blist[maxerr]) * .5;
453  a2 = b1;
454  b2 = blist[maxerr];
455  erlast = errmax;
456  rdqk21(f, ex, &a1, &b1, &area1, &error1, &resabs, &defab1);
457  rdqk21(f, ex, &a2, &b2, &area2, &error2, &resabs, &defab2);
458 
459  area12 = area1 + area2;
460  erro12 = error1 + error2;
461  errsum = errsum + erro12 - errmax;
462  area = area + area12 - rlist[maxerr];
463  if (!(defab1 == error1 || defab2 == error2)) {
464  if (fabs(rlist[maxerr] - area12) <= fabs(area12) * 1e-5 &&
465  erro12 >= errmax * .99) {
466  if (extrap)
467  ++iroff2;
468  else
469  ++iroff1;
470  }
471  if (*last > 10 && erro12 > errmax) ++iroff3;
472  }
473  rlist[maxerr] = area1;
474  rlist[*last] = area2;
475  errbnd = fmax2(*epsabs, *epsrel * fabs(area));
476 
477  if (iroff1 + iroff2 >= 10 || iroff3 >= 20) *ier = 2;
478  if (iroff2 >= 5) ierro = 3;
479 
480  if (*last == *limit) *ier = 1;
481 
482  if (fmax2(fabs(a1), fabs(b2)) <=
483  (epmach * 100. + 1.) * (fabs(a2) + uflow * 1e3)) {
484  *ier = 4;
485  }
486 
487  if (error2 > error1) {
488  alist[maxerr] = a2;
489  alist[*last] = a1;
490  blist[*last] = b1;
491  rlist[maxerr] = area2;
492  rlist[*last] = area1;
493  elist[maxerr] = error2;
494  elist[*last] = error1;
495  } else {
496  alist[*last] = a2;
497  blist[maxerr] = b1;
498  blist[*last] = b2;
499  elist[maxerr] = error1;
500  elist[*last] = error2;
501  }
502 
503  rdqpsrt(limit, last, &maxerr, &errmax, &elist[1], &iord[1], &nrmax);
504 
505  if (errsum <= errbnd) goto L115;
506  if (*ier != 0) break;
507  if (*last == 2) {
508  small = fabs(*b - *a) * .375;
509  erlarg = errsum;
510  ertest = errbnd;
511  rlist2[1] = area;
512  continue;
513  }
514  if (noext) continue;
515 
516  erlarg -= erlast;
517  if (fabs(b1 - a1) > small) {
518  erlarg += erro12;
519  }
520  if (!extrap) {
521  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
522  continue;
523  }
524  extrap = true;
525  nrmax = 2;
526  }
527 
528  if (ierro != 3 && erlarg > ertest) {
529  id = nrmax;
530  jupbnd = *last;
531  if (*last > *limit / 2 + 2) {
532  jupbnd = *limit + 3 - *last;
533  }
534  for (k = id; k <= jupbnd; ++k) {
535  maxerr = iord[nrmax];
536  errmax = elist[maxerr];
537  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
538  goto L90;
539  }
540  ++nrmax;
541  }
542  }
543 
544  ++numrl2;
545  rlist2[numrl2 - 1] = area;
546  rdqelg(&numrl2, rlist2, &reseps, &abseps, res3la, &nres);
547  ++ktmin;
548  if (ktmin > 5 && *abserr < errsum * .001) {
549  *ier = 5;
550  }
551  if (abseps < *abserr) {
552  ktmin = 0;
553  *abserr = abseps;
554  *result = reseps;
555  correc = erlarg;
556  ertest = fmax2(*epsabs, *epsrel * fabs(reseps));
557  if (*abserr <= ertest) {
558  break;
559  }
560  }
561 
562  if (numrl2 == 1) {
563  noext = true;
564  }
565  if (*ier == 5) {
566  break;
567  }
568  maxerr = iord[1];
569  errmax = elist[maxerr];
570  nrmax = 1;
571  extrap = false;
572  small *= .5;
573  erlarg = errsum;
574  L90:;
575  }
576 
577  if (*abserr == oflow) goto L115;
578  if (*ier + ierro == 0) goto L110;
579  if (ierro == 3) *abserr += correc;
580  if (*ier == 0) *ier = 3;
581  if (*result == 0. || area == 0.) {
582  if (*abserr > errsum) goto L115;
583  if (area == 0.) goto L130;
584  } else {
585  if (*abserr / fabs(*result) > errsum / fabs(area)) goto L115;
586  }
587 
588 L110:
589  if (ksgn == -1 && fmax2(fabs(*result), fabs(area)) <= defabs * .01) {
590  goto L130;
591  }
592  if (.01 > *result / area || *result / area > 100. || errsum > fabs(area)) {
593  *ier = 5;
594  }
595  goto L130;
596 
597 L115:
598  *result = 0.;
599  for (k = 1; k <= *last; ++k) *result += rlist[k];
600  *abserr = errsum;
601 L130:
602  if (*ier > 2)
603  L140:
604  *neval = *last * 42 - 21;
605  return;
606 }
607 
608 template <class Float, class integr_fn>
609 static void rdqk15i(integr_fn f, void *ex, Float *boun, int *inf, Float *a,
610  Float *b, Float *result, Float *abserr, Float *resabs,
611  Float *resasc) {
612  static double wg[8] = {0., .129484966168869693270611432679082,
613  0., .27970539148927666790146777142378,
614  0., .381830050505118944950369775488975,
615  0., .417959183673469387755102040816327};
616  static double xgk[8] = {
617  .991455371120812639206854697526329, .949107912342758524526189684047851,
618  .864864423359769072789712788640926, .741531185599394439863864773280788,
619  .58608723546769113029414483825873, .405845151377397166906606412076961,
620  .207784955007898467600689403773245, 0.};
621  static double wgk[8] = {
622  .02293532201052922496373200805897, .063092092629978553290700663189204,
623  .104790010322250183839876322541518, .140653259715525918745189590510238,
624  .16900472663926790282658342659855, .190350578064785409913256402421014,
625  .204432940075298892414161999234649, .209482141084727828012999174891714};
626 
627  Float absc, dinf, resg, resk, fsum, absc1, absc2, fval1, fval2;
628  int j;
629  Float hlgth, centr, reskh, uflow;
630  Float tabsc1, tabsc2, fc, epmach;
631  Float fv1[7], fv2[7], vec[15], vec2[15];
632  epmach = DBL_EPSILON;
633  uflow = DBL_MIN;
634  dinf = (double)imin2(1, *inf);
635 
636  centr = (*a + *b) * .5;
637  hlgth = (*b - *a) * .5;
638  tabsc1 = *boun + dinf * (1. - centr) / centr;
639  vec[0] = tabsc1;
640  if (*inf == 2) {
641  vec2[0] = -tabsc1;
642  }
643  for (j = 1; j <= 7; ++j) {
644  absc = hlgth * xgk[j - 1];
645  absc1 = centr - absc;
646  absc2 = centr + absc;
647  tabsc1 = *boun + dinf * (1. - absc1) / absc1;
648  tabsc2 = *boun + dinf * (1. - absc2) / absc2;
649  vec[(j << 1) - 1] = tabsc1;
650  vec[j * 2] = tabsc2;
651  if (*inf == 2) {
652  vec2[(j << 1) - 1] = -tabsc1;
653  vec2[j * 2] = -tabsc2;
654  }
655  }
656  f(vec, 15, ex);
657  if (*inf == 2) f(vec2, 15, ex);
658  fval1 = vec[0];
659  if (*inf == 2) fval1 += vec2[0];
660  fc = fval1 / centr / centr;
661 
662  resg = wg[7] * fc;
663  resk = wgk[7] * fc;
664  *resabs = fabs(resk);
665  for (j = 1; j <= 7; ++j) {
666  absc = hlgth * xgk[j - 1];
667  absc1 = centr - absc;
668  absc2 = centr + absc;
669  tabsc1 = *boun + dinf * (1. - absc1) / absc1;
670  tabsc2 = *boun + dinf * (1. - absc2) / absc2;
671  fval1 = vec[(j << 1) - 1];
672  fval2 = vec[j * 2];
673  if (*inf == 2) {
674  fval1 += vec2[(j << 1) - 1];
675  }
676  if (*inf == 2) {
677  fval2 += vec2[j * 2];
678  }
679  fval1 = fval1 / absc1 / absc1;
680  fval2 = fval2 / absc2 / absc2;
681  fv1[j - 1] = fval1;
682  fv2[j - 1] = fval2;
683  fsum = fval1 + fval2;
684  resg += wg[j - 1] * fsum;
685  resk += wgk[j - 1] * fsum;
686  *resabs += wgk[j - 1] * (fabs(fval1) + fabs(fval2));
687  }
688  reskh = resk * .5;
689  *resasc = wgk[7] * fabs(fc - reskh);
690  for (j = 1; j <= 7; ++j) {
691  *resasc +=
692  wgk[j - 1] * (fabs(fv1[j - 1] - reskh) + fabs(fv2[j - 1] - reskh));
693  }
694  *result = resk * hlgth;
695  *resasc *= hlgth;
696  *resabs *= hlgth;
697  *abserr = fabs((resk - resg) * hlgth);
698  if (*resasc != 0. && *abserr != 0.) {
699  *abserr = *resasc * fmin2(1., pow(*abserr * 200. / *resasc, 1.5));
700  }
701  if (*resabs > uflow / (epmach * 50.)) {
702  *abserr = fmax2(epmach * 50. * *resabs, *abserr);
703  }
704  return;
705 }
706 
707 template <class Float>
708 static void rdqelg(int *n, Float *epstab, Float *result, Float *abserr,
709  Float *res3la, int *nres) {
710  int i__, indx, ib, ib2, ie, k1, k2, k3, num, newelm, limexp;
711  Float delta1, delta2, delta3, e0, e1, e1abs, e2, e3, epmach, epsinf;
712  Float oflow, ss, res;
713  Float errA, err1, err2, err3, tol1, tol2, tol3;
714  --res3la;
715  --epstab;
716 
717  epmach = DBL_EPSILON;
718  oflow = DBL_MAX;
719  ++(*nres);
720  *abserr = oflow;
721  *result = epstab[*n];
722  if (*n < 3) {
723  goto L100;
724  }
725  limexp = 50;
726  epstab[*n + 2] = epstab[*n];
727  newelm = (*n - 1) / 2;
728  epstab[*n] = oflow;
729  num = *n;
730  k1 = *n;
731  for (i__ = 1; i__ <= newelm; ++i__) {
732  k2 = k1 - 1;
733  k3 = k1 - 2;
734  res = epstab[k1 + 2];
735  e0 = epstab[k3];
736  e1 = epstab[k2];
737  e2 = res;
738  e1abs = fabs(e1);
739  delta2 = e2 - e1;
740  err2 = fabs(delta2);
741  tol2 = fmax2(fabs(e2), e1abs) * epmach;
742  delta3 = e1 - e0;
743  err3 = fabs(delta3);
744  tol3 = fmax2(e1abs, fabs(e0)) * epmach;
745  if (err2 <= tol2 && err3 <= tol3) {
746  *result = res;
747  *abserr = err2 + err3;
748 
749  goto L100;
750  }
751 
752  e3 = epstab[k1];
753  epstab[k1] = e1;
754  delta1 = e1 - e3;
755  err1 = fabs(delta1);
756  tol1 = fmax2(e1abs, fabs(e3)) * epmach;
757 
758  if (err1 > tol1 && err2 > tol2 && err3 > tol3) {
759  ss = 1. / delta1 + 1. / delta2 - 1. / delta3;
760  epsinf = fabs(ss * e1);
761 
762  if (epsinf > 1e-4) {
763  goto L30;
764  }
765  }
766 
767  *n = i__ + i__ - 1;
768  goto L50;
769 
770  L30:
771 
772  res = e1 + 1. / ss;
773  epstab[k1] = res;
774  k1 += -2;
775  errA = err2 + fabs(res - e2) + err3;
776  if (errA <= *abserr) {
777  *abserr = errA;
778  *result = res;
779  }
780  }
781 
782 L50:
783  if (*n == limexp) {
784  *n = (limexp / 2 << 1) - 1;
785  }
786 
787  if (num / 2 << 1 == num)
788  ib = 2;
789  else
790  ib = 1;
791  ie = newelm + 1;
792  for (i__ = 1; i__ <= ie; ++i__) {
793  ib2 = ib + 2;
794  epstab[ib] = epstab[ib2];
795  ib = ib2;
796  }
797  if (num != *n) {
798  indx = num - *n + 1;
799  for (i__ = 1; i__ <= *n; ++i__) {
800  epstab[i__] = epstab[indx];
801  ++indx;
802  }
803  }
804 
805  if (*nres >= 4) {
806  *abserr = fabs(*result - res3la[3]) + fabs(*result - res3la[2]) +
807  fabs(*result - res3la[1]);
808  res3la[1] = res3la[2];
809  res3la[2] = res3la[3];
810  res3la[3] = *result;
811  } else {
812  res3la[*nres] = *result;
813  *abserr = oflow;
814  }
815 
816 L100:
817  *abserr = fmax2(*abserr, epmach * 5. * fabs(*result));
818  return;
819 }
820 
821 template <class Float, class integr_fn>
822 static void rdqk21(integr_fn f, void *ex, Float *a, Float *b, Float *result,
823  Float *abserr, Float *resabs, Float *resasc) {
824  static double wg[5] = {
825  .066671344308688137593568809893332, .149451349150580593145776339657697,
826  .219086362515982043995534934228163, .269266719309996355091226921569469,
827  .295524224714752870173892994651338};
828  static double xgk[11] = {.995657163025808080735527280689003,
829  .973906528517171720077964012084452,
830  .930157491355708226001207180059508,
831  .865063366688984510732096688423493,
832  .780817726586416897063717578345042,
833  .679409568299024406234327365114874,
834  .562757134668604683339000099272694,
835  .433395394129247190799265943165784,
836  .294392862701460198131126603103866,
837  .14887433898163121088482600112972,
838  0.};
839  static double wgk[11] = {
840  .011694638867371874278064396062192, .03255816230796472747881897245939,
841  .05475589657435199603138130024458, .07503967481091995276704314091619,
842  .093125454583697605535065465083366, .109387158802297641899210590325805,
843  .123491976262065851077958109831074, .134709217311473325928054001771707,
844  .142775938577060080797094273138717, .147739104901338491374841515972068,
845  .149445554002916905664936468389821};
846 
847  Float fv1[10], fv2[10], vec[21];
848  Float absc, resg, resk, fsum, fval1, fval2;
849  Float hlgth, centr, reskh, uflow;
850  Float fc, epmach, dhlgth;
851  int j, jtw, jtwm1;
852  epmach = DBL_EPSILON;
853  uflow = DBL_MIN;
854 
855  centr = (*a + *b) * .5;
856  hlgth = (*b - *a) * .5;
857  dhlgth = fabs(hlgth);
858 
859  resg = 0.;
860  vec[0] = centr;
861  for (j = 1; j <= 5; ++j) {
862  jtw = j << 1;
863  absc = hlgth * xgk[jtw - 1];
864  vec[(j << 1) - 1] = centr - absc;
865 
866  vec[j * 2] = centr + absc;
867  }
868  for (j = 1; j <= 5; ++j) {
869  jtwm1 = (j << 1) - 1;
870  absc = hlgth * xgk[jtwm1 - 1];
871  vec[(j << 1) + 9] = centr - absc;
872  vec[(j << 1) + 10] = centr + absc;
873  }
874  f(vec, 21, ex);
875  fc = vec[0];
876  resk = wgk[10] * fc;
877  *resabs = fabs(resk);
878  for (j = 1; j <= 5; ++j) {
879  jtw = j << 1;
880  absc = hlgth * xgk[jtw - 1];
881  fval1 = vec[(j << 1) - 1];
882  fval2 = vec[j * 2];
883  fv1[jtw - 1] = fval1;
884  fv2[jtw - 1] = fval2;
885  fsum = fval1 + fval2;
886  resg += wg[j - 1] * fsum;
887  resk += wgk[jtw - 1] * fsum;
888  *resabs += wgk[jtw - 1] * (fabs(fval1) + fabs(fval2));
889  }
890  for (j = 1; j <= 5; ++j) {
891  jtwm1 = (j << 1) - 1;
892  absc = hlgth * xgk[jtwm1 - 1];
893  fval1 = vec[(j << 1) + 9];
894  fval2 = vec[(j << 1) + 10];
895  fv1[jtwm1 - 1] = fval1;
896  fv2[jtwm1 - 1] = fval2;
897  fsum = fval1 + fval2;
898  resk += wgk[jtwm1 - 1] * fsum;
899  *resabs += wgk[jtwm1 - 1] * (fabs(fval1) + fabs(fval2));
900  }
901  reskh = resk * .5;
902  *resasc = wgk[10] * fabs(fc - reskh);
903  for (j = 1; j <= 10; ++j) {
904  *resasc +=
905  wgk[j - 1] * (fabs(fv1[j - 1] - reskh) + fabs(fv2[j - 1] - reskh));
906  }
907  *result = resk * hlgth;
908  *resabs *= dhlgth;
909  *resasc *= dhlgth;
910  *abserr = fabs((resk - resg) * hlgth);
911  if (*resasc != 0. && *abserr != 0.) {
912  *abserr = *resasc * fmin2(1., pow(*abserr * 200. / *resasc, 1.5));
913  }
914  if (*resabs > uflow / (epmach * 50.)) {
915  *abserr = fmax2(epmach * 50. * *resabs, *abserr);
916  }
917  return;
918 }
919 
920 template <class Float>
921 static void rdqpsrt(int *limit, int *last, int *maxerr, Float *ermax,
922  Float *elist, int *iord, int *nrmax) {
923  int i, j, k, ido, jbnd, isucc, jupbn;
924  Float errmin, errmax;
925  --iord;
926  --elist;
927 
928  if (*last <= 2) {
929  iord[1] = 1;
930  iord[2] = 2;
931  goto Last;
932  }
933 
934  errmax = elist[*maxerr];
935  if (*nrmax > 1) {
936  ido = *nrmax - 1;
937  for (i = 1; i <= ido; ++i) {
938  isucc = iord[*nrmax - 1];
939  if (errmax <= elist[isucc]) break;
940  iord[*nrmax] = isucc;
941  --(*nrmax);
942  }
943  }
944 
945  if (*last > *limit / 2 + 2)
946  jupbn = *limit + 3 - *last;
947  else
948  jupbn = *last;
949 
950  errmin = elist[*last];
951 
952  jbnd = jupbn - 1;
953  for (i = *nrmax + 1; i <= jbnd; ++i) {
954  isucc = iord[i];
955  if (errmax >= elist[isucc]) {
956  iord[i - 1] = *maxerr;
957  for (j = i, k = jbnd; j <= jbnd; j++, k--) {
958  isucc = iord[k];
959  if (errmin < elist[isucc]) {
960  iord[k + 1] = *last;
961  goto Last;
962  }
963  iord[k + 1] = isucc;
964  }
965  iord[i] = *last;
966  goto Last;
967  }
968  iord[i - 1] = isucc;
969  }
970 
971  iord[jbnd] = *maxerr;
972  iord[jupbn] = *last;
973 
974 Last:
975 
976  *maxerr = iord[*nrmax];
977  *ermax = elist[*maxerr];
978  return;
979 }
980 
988 struct control {
989  int subdivisions;
990  double reltol;
991  double abstol;
992  control(int subdivisions_ = 100, double reltol_ = 1e-4,
993  double abstol_ = 1e-4);
994 };
995 
1010 template <class Integrand>
1011 struct Integral {
1012  typedef typename Integrand::Scalar Type;
1013 
1014  struct vectorized_integrand {
1015  Integrand f;
1016  vectorized_integrand(Integrand f_) : f(f_) {}
1017  void operator()(Type *x, int n, void *ex) {
1018  for (int i = 0; i < n; i++) x[i] = f(x[i]);
1019  }
1020  } fn;
1022  Integrand &integrand() { return fn.f; }
1023 
1024  Type epsabs, epsrel, result, abserr;
1025  int neval, ier, limit, lenw, last;
1026  std::vector<int> iwork;
1027  std::vector<Type> work;
1028  void setAccuracy(double epsrel_ = 1e-4, double epsabs_ = 1e-4) {
1029  epsabs = epsabs_;
1030  epsrel = epsrel_;
1031  result = 0;
1032  abserr = 1e4;
1033  neval = 0;
1034  ier = 0;
1035  last = 0;
1036  }
1037  void setWorkspace(int subdivisions = 100) {
1038  limit = subdivisions;
1039  lenw = 4 * limit;
1040  iwork.resize(limit);
1041  work.resize(lenw);
1042  }
1043  Type a, b, bound;
1044  int inf;
1045  void setBounds(Type a_, Type b_) {
1046  int a_finite = (a_ != -INFINITY) && (a_ != INFINITY);
1047  int b_finite = (b_ != -INFINITY) && (b_ != INFINITY);
1048  if (a_finite && b_finite) {
1049  inf = 0;
1050  a = a_;
1051  b = b_;
1052  } else if (a_finite && !b_finite) {
1053  inf = 1;
1054  bound = a_;
1055  } else if (!a_finite && b_finite) {
1056  inf = -1;
1057  bound = b_;
1058  } else {
1059  inf = 2;
1060  }
1061  }
1068  Integral(Integrand f_, Type a_, Type b_, control c = control()) : fn(f_) {
1069  setAccuracy(c.reltol, c.abstol);
1070  setWorkspace(c.subdivisions);
1071  setBounds(a_, b_);
1072  }
1073  Type operator()() {
1074  if (inf)
1075  Rdqagi(fn, NULL, &bound, &inf, &epsabs, &epsrel, &result, &abserr, &neval,
1076  &ier, &limit, &lenw, &last, &iwork[0], &work[0]);
1077  else
1078  Rdqags(fn, NULL, &a, &b, &epsabs, &epsrel, &result, &abserr, &neval, &ier,
1079  &limit, &lenw, &last, &iwork[0], &work[0]);
1080  return result;
1081  }
1082 };
1083 
1110 template <class Integrand>
1111 typename Integrand::Scalar integrate(Integrand f,
1112  typename Integrand::Scalar a = -INFINITY,
1113  typename Integrand::Scalar b = INFINITY,
1114  control c = control()) {
1115  Integral<Integrand> I(f, a, b, c);
1116  return I();
1117 }
1118 
1133 template <class Integrand>
1134 struct mvIntegral {
1135  typedef typename Integrand::Scalar Scalar;
1136  struct evaluator {
1137  typedef typename Integrand::Scalar Scalar;
1138  Integrand &f;
1139  Scalar &x;
1140  evaluator(Integrand &f_, Scalar &x_) : f(f_), x(x_) {}
1141  Scalar operator()(const Scalar &x_) {
1142  x = x_;
1143  return f();
1144  }
1145  } ev;
1146  control c;
1148  mvIntegral(Integrand &f_, Scalar &x_, Scalar a = -INFINITY,
1149  Scalar b = INFINITY, control c_ = control())
1150  : ev(f_, x_), c(c_), I(ev, a, b, c_) {}
1151  Scalar operator()() { return I(); }
1153  mvIntegral<mvIntegral> wrt(Scalar &x, Scalar a = -INFINITY,
1154  Scalar b = INFINITY) {
1155  return mvIntegral<mvIntegral>(*this, x, a, b, c);
1156  }
1157 };
1158 
1159 template <class Integrand>
1160 struct mvIntegral0 {
1161  typedef typename Integrand::Scalar Scalar;
1162  Integrand &f;
1163  control c;
1164  mvIntegral0(Integrand &f_, control c_) : f(f_), c(c_) {}
1166  mvIntegral<Integrand> wrt(Scalar &x, Scalar a = -INFINITY,
1167  Scalar b = INFINITY) {
1168  return mvIntegral<Integrand>(f, x, a, b, c);
1169  }
1170 };
1198 template <class Integrand>
1199 mvIntegral0<Integrand> mvIntegrate(Integrand &f, control c = control()) {
1200  return mvIntegral0<Integrand>(f, c);
1201 }
1202 
1203 } // namespace TMBad
1204 #endif // HAVE_INTEGRATE_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_INTEGRATE_HPP
2 #define HAVE_INTEGRATE_HPP
3 // Autogenerated - do not edit by hand !
4 #include <float.h> // INFINITY etc
5 #include "global.hpp"
6 
7 namespace TMBad {
8 
14 template <class T>
15 double value(T x) {
16  return TMBad::Value(x);
17 }
18 double value(double x);
19 template <class S, class T>
20 int imin2(S x, T y) {
21  return (x < y) ? x : y;
22 }
23 template <class S, class T>
24 double fmin2(S x, T y) {
25  return (value(x) < value(y)) ? value(x) : value(y);
26 }
27 template <class S, class T>
28 double fmax2(S x, T y) {
29  return (value(x) < value(y)) ? value(y) : value(x);
30 }
31 template <class Float, class integr_fn>
32 static void rdqagie(integr_fn f, void *ex, Float *, int *, Float *, Float *,
33  int *, Float *, Float *, int *, int *, Float *, Float *,
34  Float *, Float *, int *, int *);
35 
36 template <class Float, class integr_fn>
37 static void rdqk15i(integr_fn f, void *ex, Float *, int *, Float *, Float *,
38  Float *, Float *, Float *, Float *);
39 
40 template <class Float, class integr_fn>
41 static void rdqagse(integr_fn f, void *ex, Float *, Float *, Float *, Float *,
42  int *, Float *, Float *, int *, int *, Float *, Float *,
43  Float *, Float *, int *, int *);
44 
45 template <class Float, class integr_fn>
46 static void rdqk21(integr_fn f, void *ex, Float *, Float *, Float *, Float *,
47  Float *, Float *);
48 
49 template <class Float>
50 static void rdqpsrt(int *, int *, int *, Float *, Float *, int *, int *);
51 
52 template <class Float>
53 static void rdqelg(int *, Float *, Float *, Float *, Float *, int *);
54 
55 template <class Float, class integr_fn>
56 void Rdqagi(integr_fn f, void *ex, Float *bound, int *inf, Float *epsabs,
57  Float *epsrel, Float *result, Float *abserr, int *neval, int *ier,
58  int *limit, int *lenw, int *last, int *iwork, Float *work) {
59  int l1, l2, l3;
60  *ier = 6;
61  *neval = 0;
62  *last = 0;
63  *result = 0.;
64  *abserr = 0.;
65  if (*limit < 1 || *lenw < *limit << 2) return;
66 
67  l1 = *limit;
68  l2 = *limit + l1;
69  l3 = *limit + l2;
70 
71  rdqagie(f, ex, bound, inf, epsabs, epsrel, limit, result, abserr, neval, ier,
72  work, &work[l1], &work[l2], &work[l3], iwork, last);
73 
74  return;
75 }
76 
77 template <class Float, class integr_fn>
78 static void rdqagie(integr_fn f, void *ex, Float *bound, int *inf,
79  Float *epsabs, Float *epsrel, int *limit, Float *result,
80  Float *abserr, int *neval, int *ier, Float *alist,
81  Float *blist, Float *rlist, Float *elist, int *iord,
82  int *last) {
83  Float area, dres;
84  int ksgn;
85  Float boun;
86  int nres;
87  Float area1, area2, area12;
88  int k;
89  Float small = 0.0, erro12;
90  int ierro;
91  Float a1, a2, b1, b2, defab1, defab2, oflow;
92  int ktmin, nrmax;
93  Float uflow;
94  bool noext;
95  int iroff1, iroff2, iroff3;
96  Float res3la[3], error1, error2;
97  int id;
98  Float rlist2[52];
99  int numrl2;
100  Float defabs, epmach, erlarg = 0.0, abseps, correc = 0.0, errbnd, resabs;
101  int jupbnd;
102  Float erlast, errmax;
103  int maxerr;
104  Float reseps;
105  bool extrap;
106  Float ertest = 0.0, errsum;
107  --iord;
108  --elist;
109  --rlist;
110  --blist;
111  --alist;
112 
113  epmach = DBL_EPSILON;
114 
115  *ier = 0;
116  *neval = 0;
117  *last = 0;
118  *result = 0.;
119  *abserr = 0.;
120  alist[1] = 0.;
121  blist[1] = 1.;
122  rlist[1] = 0.;
123  elist[1] = 0.;
124  iord[1] = 0;
125  if (*epsabs <= 0. && (*epsrel < fmax2(epmach * 50., 5e-29))) *ier = 6;
126  if (*ier == 6) return;
127  boun = *bound;
128  if (*inf == 2) {
129  boun = 0.;
130  }
131 
132  static Float c_b6 = 0.;
133  static Float c_b7 = 1.;
134 
135  rdqk15i(f, ex, &boun, inf, &c_b6, &c_b7, result, abserr, &defabs, &resabs);
136 
137  *last = 1;
138  rlist[1] = *result;
139  elist[1] = *abserr;
140  iord[1] = 1;
141  dres = fabs(*result);
142  errbnd = fmax2(*epsabs, *epsrel * dres);
143  if (*abserr <= epmach * 100. * defabs && *abserr > errbnd) *ier = 2;
144  if (*limit == 1) *ier = 1;
145  if (*ier != 0 || (*abserr <= errbnd && *abserr != resabs) || *abserr == 0.)
146  goto L130;
147 
148  uflow = DBL_MIN;
149  oflow = DBL_MAX;
150  rlist2[0] = *result;
151  errmax = *abserr;
152  maxerr = 1;
153  area = *result;
154  errsum = *abserr;
155  *abserr = oflow;
156  nrmax = 1;
157  nres = 0;
158  ktmin = 0;
159  numrl2 = 2;
160  extrap = false;
161  noext = false;
162  ierro = 0;
163  iroff1 = 0;
164  iroff2 = 0;
165  iroff3 = 0;
166  ksgn = -1;
167  if (dres >= (1. - epmach * 50.) * defabs) {
168  ksgn = 1;
169  }
170 
171  for (*last = 2; *last <= *limit; ++(*last)) {
172  a1 = alist[maxerr];
173  b1 = (alist[maxerr] + blist[maxerr]) * .5;
174  a2 = b1;
175  b2 = blist[maxerr];
176  erlast = errmax;
177  rdqk15i(f, ex, &boun, inf, &a1, &b1, &area1, &error1, &resabs, &defab1);
178  rdqk15i(f, ex, &boun, inf, &a2, &b2, &area2, &error2, &resabs, &defab2);
179 
180  area12 = area1 + area2;
181  erro12 = error1 + error2;
182  errsum = errsum + erro12 - errmax;
183  area = area + area12 - rlist[maxerr];
184  if (!(defab1 == error1 || defab2 == error2)) {
185  if (fabs(rlist[maxerr] - area12) <= fabs(area12) * 1e-5 &&
186  erro12 >= errmax * .99) {
187  if (extrap)
188  ++iroff2;
189  else
190  ++iroff1;
191  }
192  if (*last > 10 && erro12 > errmax) ++iroff3;
193  }
194 
195  rlist[maxerr] = area1;
196  rlist[*last] = area2;
197  errbnd = fmax2(*epsabs, *epsrel * fabs(area));
198 
199  if (iroff1 + iroff2 >= 10 || iroff3 >= 20) *ier = 2;
200  if (iroff2 >= 5) ierro = 3;
201 
202  if (*last == *limit) *ier = 1;
203 
204  if (fmax2(fabs(a1), fabs(b2)) <=
205  (epmach * 100. + 1.) * (fabs(a2) + uflow * 1e3)) {
206  *ier = 4;
207  }
208 
209  if (error2 <= error1) {
210  alist[*last] = a2;
211  blist[maxerr] = b1;
212  blist[*last] = b2;
213  elist[maxerr] = error1;
214  elist[*last] = error2;
215  } else {
216  alist[maxerr] = a2;
217  alist[*last] = a1;
218  blist[*last] = b1;
219  rlist[maxerr] = area2;
220  rlist[*last] = area1;
221  elist[maxerr] = error2;
222  elist[*last] = error1;
223  }
224 
225  rdqpsrt(limit, last, &maxerr, &errmax, &elist[1], &iord[1], &nrmax);
226  if (errsum <= errbnd) {
227  goto L115;
228  }
229  if (*ier != 0) break;
230  if (*last == 2) {
231  small = .375;
232  erlarg = errsum;
233  ertest = errbnd;
234  rlist2[1] = area;
235  continue;
236  }
237  if (noext) continue;
238 
239  erlarg -= erlast;
240  if (fabs(b1 - a1) > small) {
241  erlarg += erro12;
242  }
243  if (!extrap) {
244  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
245  continue;
246  }
247  extrap = true;
248  nrmax = 2;
249  }
250 
251  if (ierro != 3 && erlarg > ertest) {
252  id = nrmax;
253  jupbnd = *last;
254  if (*last > *limit / 2 + 2) {
255  jupbnd = *limit + 3 - *last;
256  }
257  for (k = id; k <= jupbnd; ++k) {
258  maxerr = iord[nrmax];
259  errmax = elist[maxerr];
260  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
261  goto L90;
262  }
263  ++nrmax;
264  }
265  }
266 
267  ++numrl2;
268  rlist2[numrl2 - 1] = area;
269  rdqelg(&numrl2, rlist2, &reseps, &abseps, res3la, &nres);
270  ++ktmin;
271  if (ktmin > 5 && *abserr < errsum * .001) {
272  *ier = 5;
273  }
274  if (abseps >= *abserr) {
275  goto L70;
276  }
277  ktmin = 0;
278  *abserr = abseps;
279  *result = reseps;
280  correc = erlarg;
281  ertest = fmax2(*epsabs, *epsrel * fabs(reseps));
282  if (*abserr <= ertest) {
283  break;
284  }
285 
286  L70:
287  if (numrl2 == 1) {
288  noext = true;
289  }
290  if (*ier == 5) {
291  break;
292  }
293  maxerr = iord[1];
294  errmax = elist[maxerr];
295  nrmax = 1;
296  extrap = false;
297  small *= .5;
298  erlarg = errsum;
299  L90:;
300  }
301 
302  if (*abserr == oflow) {
303  goto L115;
304  }
305  if (*ier + ierro == 0) {
306  goto L110;
307  }
308  if (ierro == 3) {
309  *abserr += correc;
310  }
311  if (*ier == 0) {
312  *ier = 3;
313  }
314  if (*result == 0. || area == 0.) {
315  if (*abserr > errsum) goto L115;
316 
317  if (area == 0.) goto L130;
318  } else {
319  if (*abserr / fabs(*result) > errsum / fabs(area)) {
320  goto L115;
321  }
322  }
323 
324 L110:
325  if (ksgn == -1 && fmax2(fabs(*result), fabs(area)) <= defabs * .01) {
326  goto L130;
327  }
328  if (.01 > *result / area || *result / area > 100. || errsum > fabs(area)) {
329  *ier = 6;
330  }
331  goto L130;
332 
333 L115:
334  *result = 0.;
335  for (k = 1; k <= *last; ++k) *result += rlist[k];
336 
337  *abserr = errsum;
338 L130:
339  *neval = *last * 30 - 15;
340  if (*inf == 2) {
341  *neval <<= 1;
342  }
343  if (*ier > 2) {
344  --(*ier);
345  }
346  return;
347 }
348 
349 template <class Float, class integr_fn>
350 void Rdqags(integr_fn f, void *ex, Float *a, Float *b, Float *epsabs,
351  Float *epsrel, Float *result, Float *abserr, int *neval, int *ier,
352  int *limit, int *lenw, int *last, int *iwork, Float *work) {
353  int l1, l2, l3;
354  *ier = 6;
355  *neval = 0;
356  *last = 0;
357  *result = 0.;
358  *abserr = 0.;
359  if (*limit < 1 || *lenw < *limit * 4) return;
360 
361  l1 = *limit;
362  l2 = *limit + l1;
363  l3 = *limit + l2;
364 
365  rdqagse(f, ex, a, b, epsabs, epsrel, limit, result, abserr, neval, ier, work,
366  &work[l1], &work[l2], &work[l3], iwork, last);
367 
368  return;
369 }
370 
371 template <class Float, class integr_fn>
372 static void rdqagse(integr_fn f, void *ex, Float *a, Float *b, Float *epsabs,
373  Float *epsrel, int *limit, Float *result, Float *abserr,
374  int *neval, int *ier, Float *alist, Float *blist,
375  Float *rlist, Float *elist, int *iord, int *last) {
376  bool noext, extrap;
377  int k, ksgn, nres;
378  int ierro;
379  int ktmin, nrmax;
380  int iroff1, iroff2, iroff3;
381  int id;
382  int numrl2;
383  int jupbnd;
384  int maxerr;
385  Float res3la[3];
386  Float rlist2[52];
387  Float abseps, area, area1, area2, area12, dres, epmach;
388  Float a1, a2, b1, b2, defabs, defab1, defab2, oflow, uflow, resabs, reseps;
389  Float error1, error2, erro12, errbnd, erlast, errmax, errsum;
390 
391  Float correc = 0.0, erlarg = 0.0, ertest = 0.0, small = 0.0;
392  --iord;
393  --elist;
394  --rlist;
395  --blist;
396  --alist;
397 
398  epmach = DBL_EPSILON;
399 
400  *ier = 0;
401  *neval = 0;
402  *last = 0;
403  *result = 0.;
404  *abserr = 0.;
405  alist[1] = *a;
406  blist[1] = *b;
407  rlist[1] = 0.;
408  elist[1] = 0.;
409  if (*epsabs <= 0. && *epsrel < fmax2(epmach * 50., 5e-29)) {
410  *ier = 6;
411  return;
412  }
413 
414  uflow = DBL_MIN;
415  oflow = DBL_MAX;
416  ierro = 0;
417  rdqk21(f, ex, a, b, result, abserr, &defabs, &resabs);
418 
419  dres = fabs(*result);
420  errbnd = fmax2(*epsabs, *epsrel * dres);
421  *last = 1;
422  rlist[1] = *result;
423  elist[1] = *abserr;
424  iord[1] = 1;
425  if (*abserr <= epmach * 100. * defabs && *abserr > errbnd) *ier = 2;
426  if (*limit == 1) *ier = 1;
427  if (*ier != 0 || (*abserr <= errbnd && *abserr != resabs) || *abserr == 0.)
428  goto L140;
429 
430  rlist2[0] = *result;
431  errmax = *abserr;
432  maxerr = 1;
433  area = *result;
434  errsum = *abserr;
435  *abserr = oflow;
436  nrmax = 1;
437  nres = 0;
438  numrl2 = 2;
439  ktmin = 0;
440  extrap = false;
441  noext = false;
442  iroff1 = 0;
443  iroff2 = 0;
444  iroff3 = 0;
445  ksgn = -1;
446  if (dres >= (1. - epmach * 50.) * defabs) {
447  ksgn = 1;
448  }
449 
450  for (*last = 2; *last <= *limit; ++(*last)) {
451  a1 = alist[maxerr];
452  b1 = (alist[maxerr] + blist[maxerr]) * .5;
453  a2 = b1;
454  b2 = blist[maxerr];
455  erlast = errmax;
456  rdqk21(f, ex, &a1, &b1, &area1, &error1, &resabs, &defab1);
457  rdqk21(f, ex, &a2, &b2, &area2, &error2, &resabs, &defab2);
458 
459  area12 = area1 + area2;
460  erro12 = error1 + error2;
461  errsum = errsum + erro12 - errmax;
462  area = area + area12 - rlist[maxerr];
463  if (!(defab1 == error1 || defab2 == error2)) {
464  if (fabs(rlist[maxerr] - area12) <= fabs(area12) * 1e-5 &&
465  erro12 >= errmax * .99) {
466  if (extrap)
467  ++iroff2;
468  else
469  ++iroff1;
470  }
471  if (*last > 10 && erro12 > errmax) ++iroff3;
472  }
473  rlist[maxerr] = area1;
474  rlist[*last] = area2;
475  errbnd = fmax2(*epsabs, *epsrel * fabs(area));
476 
477  if (iroff1 + iroff2 >= 10 || iroff3 >= 20) *ier = 2;
478  if (iroff2 >= 5) ierro = 3;
479 
480  if (*last == *limit) *ier = 1;
481 
482  if (fmax2(fabs(a1), fabs(b2)) <=
483  (epmach * 100. + 1.) * (fabs(a2) + uflow * 1e3)) {
484  *ier = 4;
485  }
486 
487  if (error2 > error1) {
488  alist[maxerr] = a2;
489  alist[*last] = a1;
490  blist[*last] = b1;
491  rlist[maxerr] = area2;
492  rlist[*last] = area1;
493  elist[maxerr] = error2;
494  elist[*last] = error1;
495  } else {
496  alist[*last] = a2;
497  blist[maxerr] = b1;
498  blist[*last] = b2;
499  elist[maxerr] = error1;
500  elist[*last] = error2;
501  }
502 
503  rdqpsrt(limit, last, &maxerr, &errmax, &elist[1], &iord[1], &nrmax);
504 
505  if (errsum <= errbnd) goto L115;
506  if (*ier != 0) break;
507  if (*last == 2) {
508  small = fabs(*b - *a) * .375;
509  erlarg = errsum;
510  ertest = errbnd;
511  rlist2[1] = area;
512  continue;
513  }
514  if (noext) continue;
515 
516  erlarg -= erlast;
517  if (fabs(b1 - a1) > small) {
518  erlarg += erro12;
519  }
520  if (!extrap) {
521  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
522  continue;
523  }
524  extrap = true;
525  nrmax = 2;
526  }
527 
528  if (ierro != 3 && erlarg > ertest) {
529  id = nrmax;
530  jupbnd = *last;
531  if (*last > *limit / 2 + 2) {
532  jupbnd = *limit + 3 - *last;
533  }
534  for (k = id; k <= jupbnd; ++k) {
535  maxerr = iord[nrmax];
536  errmax = elist[maxerr];
537  if (fabs(blist[maxerr] - alist[maxerr]) > small) {
538  goto L90;
539  }
540  ++nrmax;
541  }
542  }
543 
544  ++numrl2;
545  rlist2[numrl2 - 1] = area;
546  rdqelg(&numrl2, rlist2, &reseps, &abseps, res3la, &nres);
547  ++ktmin;
548  if (ktmin > 5 && *abserr < errsum * .001) {
549  *ier = 5;
550  }
551  if (abseps < *abserr) {
552  ktmin = 0;
553  *abserr = abseps;
554  *result = reseps;
555  correc = erlarg;
556  ertest = fmax2(*epsabs, *epsrel * fabs(reseps));
557  if (*abserr <= ertest) {
558  break;
559  }
560  }
561 
562  if (numrl2 == 1) {
563  noext = true;
564  }
565  if (*ier == 5) {
566  break;
567  }
568  maxerr = iord[1];
569  errmax = elist[maxerr];
570  nrmax = 1;
571  extrap = false;
572  small *= .5;
573  erlarg = errsum;
574  L90:;
575  }
576 
577  if (*abserr == oflow) goto L115;
578  if (*ier + ierro == 0) goto L110;
579  if (ierro == 3) *abserr += correc;
580  if (*ier == 0) *ier = 3;
581  if (*result == 0. || area == 0.) {
582  if (*abserr > errsum) goto L115;
583  if (area == 0.) goto L130;
584  } else {
585  if (*abserr / fabs(*result) > errsum / fabs(area)) goto L115;
586  }
587 
588 L110:
589  if (ksgn == -1 && fmax2(fabs(*result), fabs(area)) <= defabs * .01) {
590  goto L130;
591  }
592  if (.01 > *result / area || *result / area > 100. || errsum > fabs(area)) {
593  *ier = 5;
594  }
595  goto L130;
596 
597 L115:
598  *result = 0.;
599  for (k = 1; k <= *last; ++k) *result += rlist[k];
600  *abserr = errsum;
601 L130:
602  if (*ier > 2)
603  L140:
604  *neval = *last * 42 - 21;
605  return;
606 }
607 
608 template <class Float, class integr_fn>
609 static void rdqk15i(integr_fn f, void *ex, Float *boun, int *inf, Float *a,
610  Float *b, Float *result, Float *abserr, Float *resabs,
611  Float *resasc) {
612  static double wg[8] = {0., .129484966168869693270611432679082,
613  0., .27970539148927666790146777142378,
614  0., .381830050505118944950369775488975,
615  0., .417959183673469387755102040816327};
616  static double xgk[8] = {
617  .991455371120812639206854697526329, .949107912342758524526189684047851,
618  .864864423359769072789712788640926, .741531185599394439863864773280788,
619  .58608723546769113029414483825873, .405845151377397166906606412076961,
620  .207784955007898467600689403773245, 0.};
621  static double wgk[8] = {
622  .02293532201052922496373200805897, .063092092629978553290700663189204,
623  .104790010322250183839876322541518, .140653259715525918745189590510238,
624  .16900472663926790282658342659855, .190350578064785409913256402421014,
625  .204432940075298892414161999234649, .209482141084727828012999174891714};
626 
627  Float absc, dinf, resg, resk, fsum, absc1, absc2, fval1, fval2;
628  int j;
629  Float hlgth, centr, reskh, uflow;
630  Float tabsc1, tabsc2, fc, epmach;
631  Float fv1[7], fv2[7], vec[15], vec2[15];
632  epmach = DBL_EPSILON;
633  uflow = DBL_MIN;
634  dinf = (double)imin2(1, *inf);
635 
636  centr = (*a + *b) * .5;
637  hlgth = (*b - *a) * .5;
638  tabsc1 = *boun + dinf * (1. - centr) / centr;
639  vec[0] = tabsc1;
640  if (*inf == 2) {
641  vec2[0] = -tabsc1;
642  }
643  for (j = 1; j <= 7; ++j) {
644  absc = hlgth * xgk[j - 1];
645  absc1 = centr - absc;
646  absc2 = centr + absc;
647  tabsc1 = *boun + dinf * (1. - absc1) / absc1;
648  tabsc2 = *boun + dinf * (1. - absc2) / absc2;
649  vec[(j << 1) - 1] = tabsc1;
650  vec[j * 2] = tabsc2;
651  if (*inf == 2) {
652  vec2[(j << 1) - 1] = -tabsc1;
653  vec2[j * 2] = -tabsc2;
654  }
655  }
656  f(vec, 15, ex);
657  if (*inf == 2) f(vec2, 15, ex);
658  fval1 = vec[0];
659  if (*inf == 2) fval1 += vec2[0];
660  fc = fval1 / centr / centr;
661 
662  resg = wg[7] * fc;
663  resk = wgk[7] * fc;
664  *resabs = fabs(resk);
665  for (j = 1; j <= 7; ++j) {
666  absc = hlgth * xgk[j - 1];
667  absc1 = centr - absc;
668  absc2 = centr + absc;
669  tabsc1 = *boun + dinf * (1. - absc1) / absc1;
670  tabsc2 = *boun + dinf * (1. - absc2) / absc2;
671  fval1 = vec[(j << 1) - 1];
672  fval2 = vec[j * 2];
673  if (*inf == 2) {
674  fval1 += vec2[(j << 1) - 1];
675  }
676  if (*inf == 2) {
677  fval2 += vec2[j * 2];
678  }
679  fval1 = fval1 / absc1 / absc1;
680  fval2 = fval2 / absc2 / absc2;
681  fv1[j - 1] = fval1;
682  fv2[j - 1] = fval2;
683  fsum = fval1 + fval2;
684  resg += wg[j - 1] * fsum;
685  resk += wgk[j - 1] * fsum;
686  *resabs += wgk[j - 1] * (fabs(fval1) + fabs(fval2));
687  }
688  reskh = resk * .5;
689  *resasc = wgk[7] * fabs(fc - reskh);
690  for (j = 1; j <= 7; ++j) {
691  *resasc +=
692  wgk[j - 1] * (fabs(fv1[j - 1] - reskh) + fabs(fv2[j - 1] - reskh));
693  }
694  *result = resk * hlgth;
695  *resasc *= hlgth;
696  *resabs *= hlgth;
697  *abserr = fabs((resk - resg) * hlgth);
698  if (*resasc != 0. && *abserr != 0.) {
699  *abserr = *resasc * fmin2(1., pow(*abserr * 200. / *resasc, 1.5));
700  }
701  if (*resabs > uflow / (epmach * 50.)) {
702  *abserr = fmax2(epmach * 50. * *resabs, *abserr);
703  }
704  return;
705 }
706 
707 template <class Float>
708 static void rdqelg(int *n, Float *epstab, Float *result, Float *abserr,
709  Float *res3la, int *nres) {
710  int i__, indx, ib, ib2, ie, k1, k2, k3, num, newelm, limexp;
711  Float delta1, delta2, delta3, e0, e1, e1abs, e2, e3, epmach, epsinf;
712  Float oflow, ss, res;
713  Float errA, err1, err2, err3, tol1, tol2, tol3;
714  --res3la;
715  --epstab;
716 
717  epmach = DBL_EPSILON;
718  oflow = DBL_MAX;
719  ++(*nres);
720  *abserr = oflow;
721  *result = epstab[*n];
722  if (*n < 3) {
723  goto L100;
724  }
725  limexp = 50;
726  epstab[*n + 2] = epstab[*n];
727  newelm = (*n - 1) / 2;
728  epstab[*n] = oflow;
729  num = *n;
730  k1 = *n;
731  for (i__ = 1; i__ <= newelm; ++i__) {
732  k2 = k1 - 1;
733  k3 = k1 - 2;
734  res = epstab[k1 + 2];
735  e0 = epstab[k3];
736  e1 = epstab[k2];
737  e2 = res;
738  e1abs = fabs(e1);
739  delta2 = e2 - e1;
740  err2 = fabs(delta2);
741  tol2 = fmax2(fabs(e2), e1abs) * epmach;
742  delta3 = e1 - e0;
743  err3 = fabs(delta3);
744  tol3 = fmax2(e1abs, fabs(e0)) * epmach;
745  if (err2 <= tol2 && err3 <= tol3) {
746  *result = res;
747  *abserr = err2 + err3;
748 
749  goto L100;
750  }
751 
752  e3 = epstab[k1];
753  epstab[k1] = e1;
754  delta1 = e1 - e3;
755  err1 = fabs(delta1);
756  tol1 = fmax2(e1abs, fabs(e3)) * epmach;
757 
758  if (err1 > tol1 && err2 > tol2 && err3 > tol3) {
759  ss = 1. / delta1 + 1. / delta2 - 1. / delta3;
760  epsinf = fabs(ss * e1);
761 
762  if (epsinf > 1e-4) {
763  goto L30;
764  }
765  }
766 
767  *n = i__ + i__ - 1;
768  goto L50;
769 
770  L30:
771 
772  res = e1 + 1. / ss;
773  epstab[k1] = res;
774  k1 += -2;
775  errA = err2 + fabs(res - e2) + err3;
776  if (errA <= *abserr) {
777  *abserr = errA;
778  *result = res;
779  }
780  }
781 
782 L50:
783  if (*n == limexp) {
784  *n = (limexp / 2 << 1) - 1;
785  }
786 
787  if (num / 2 << 1 == num)
788  ib = 2;
789  else
790  ib = 1;
791  ie = newelm + 1;
792  for (i__ = 1; i__ <= ie; ++i__) {
793  ib2 = ib + 2;
794  epstab[ib] = epstab[ib2];
795  ib = ib2;
796  }
797  if (num != *n) {
798  indx = num - *n + 1;
799  for (i__ = 1; i__ <= *n; ++i__) {
800  epstab[i__] = epstab[indx];
801  ++indx;
802  }
803  }
804 
805  if (*nres >= 4) {
806  *abserr = fabs(*result - res3la[3]) + fabs(*result - res3la[2]) +
807  fabs(*result - res3la[1]);
808  res3la[1] = res3la[2];
809  res3la[2] = res3la[3];
810  res3la[3] = *result;
811  } else {
812  res3la[*nres] = *result;
813  *abserr = oflow;
814  }
815 
816 L100:
817  *abserr = fmax2(*abserr, epmach * 5. * fabs(*result));
818  return;
819 }
820 
821 template <class Float, class integr_fn>
822 static void rdqk21(integr_fn f, void *ex, Float *a, Float *b, Float *result,
823  Float *abserr, Float *resabs, Float *resasc) {
824  static double wg[5] = {
825  .066671344308688137593568809893332, .149451349150580593145776339657697,
826  .219086362515982043995534934228163, .269266719309996355091226921569469,
827  .295524224714752870173892994651338};
828  static double xgk[11] = {.995657163025808080735527280689003,
829  .973906528517171720077964012084452,
830  .930157491355708226001207180059508,
831  .865063366688984510732096688423493,
832  .780817726586416897063717578345042,
833  .679409568299024406234327365114874,
834  .562757134668604683339000099272694,
835  .433395394129247190799265943165784,
836  .294392862701460198131126603103866,
837  .14887433898163121088482600112972,
838  0.};
839  static double wgk[11] = {
840  .011694638867371874278064396062192, .03255816230796472747881897245939,
841  .05475589657435199603138130024458, .07503967481091995276704314091619,
842  .093125454583697605535065465083366, .109387158802297641899210590325805,
843  .123491976262065851077958109831074, .134709217311473325928054001771707,
844  .142775938577060080797094273138717, .147739104901338491374841515972068,
845  .149445554002916905664936468389821};
846 
847  Float fv1[10], fv2[10], vec[21];
848  Float absc, resg, resk, fsum, fval1, fval2;
849  Float hlgth, centr, reskh, uflow;
850  Float fc, epmach, dhlgth;
851  int j, jtw, jtwm1;
852  epmach = DBL_EPSILON;
853  uflow = DBL_MIN;
854 
855  centr = (*a + *b) * .5;
856  hlgth = (*b - *a) * .5;
857  dhlgth = fabs(hlgth);
858 
859  resg = 0.;
860  vec[0] = centr;
861  for (j = 1; j <= 5; ++j) {
862  jtw = j << 1;
863  absc = hlgth * xgk[jtw - 1];
864  vec[(j << 1) - 1] = centr - absc;
865 
866  vec[j * 2] = centr + absc;
867  }
868  for (j = 1; j <= 5; ++j) {
869  jtwm1 = (j << 1) - 1;
870  absc = hlgth * xgk[jtwm1 - 1];
871  vec[(j << 1) + 9] = centr - absc;
872  vec[(j << 1) + 10] = centr + absc;
873  }
874  f(vec, 21, ex);
875  fc = vec[0];
876  resk = wgk[10] * fc;
877  *resabs = fabs(resk);
878  for (j = 1; j <= 5; ++j) {
879  jtw = j << 1;
880  absc = hlgth * xgk[jtw - 1];
881  fval1 = vec[(j << 1) - 1];
882  fval2 = vec[j * 2];
883  fv1[jtw - 1] = fval1;
884  fv2[jtw - 1] = fval2;
885  fsum = fval1 + fval2;
886  resg += wg[j - 1] * fsum;
887  resk += wgk[jtw - 1] * fsum;
888  *resabs += wgk[jtw - 1] * (fabs(fval1) + fabs(fval2));
889  }
890  for (j = 1; j <= 5; ++j) {
891  jtwm1 = (j << 1) - 1;
892  absc = hlgth * xgk[jtwm1 - 1];
893  fval1 = vec[(j << 1) + 9];
894  fval2 = vec[(j << 1) + 10];
895  fv1[jtwm1 - 1] = fval1;
896  fv2[jtwm1 - 1] = fval2;
897  fsum = fval1 + fval2;
898  resk += wgk[jtwm1 - 1] * fsum;
899  *resabs += wgk[jtwm1 - 1] * (fabs(fval1) + fabs(fval2));
900  }
901  reskh = resk * .5;
902  *resasc = wgk[10] * fabs(fc - reskh);
903  for (j = 1; j <= 10; ++j) {
904  *resasc +=
905  wgk[j - 1] * (fabs(fv1[j - 1] - reskh) + fabs(fv2[j - 1] - reskh));
906  }
907  *result = resk * hlgth;
908  *resabs *= dhlgth;
909  *resasc *= dhlgth;
910  *abserr = fabs((resk - resg) * hlgth);
911  if (*resasc != 0. && *abserr != 0.) {
912  *abserr = *resasc * fmin2(1., pow(*abserr * 200. / *resasc, 1.5));
913  }
914  if (*resabs > uflow / (epmach * 50.)) {
915  *abserr = fmax2(epmach * 50. * *resabs, *abserr);
916  }
917  return;
918 }
919 
920 template <class Float>
921 static void rdqpsrt(int *limit, int *last, int *maxerr, Float *ermax,
922  Float *elist, int *iord, int *nrmax) {
923  int i, j, k, ido, jbnd, isucc, jupbn;
924  Float errmin, errmax;
925  --iord;
926  --elist;
927 
928  if (*last <= 2) {
929  iord[1] = 1;
930  iord[2] = 2;
931  goto Last;
932  }
933 
934  errmax = elist[*maxerr];
935  if (*nrmax > 1) {
936  ido = *nrmax - 1;
937  for (i = 1; i <= ido; ++i) {
938  isucc = iord[*nrmax - 1];
939  if (errmax <= elist[isucc]) break;
940  iord[*nrmax] = isucc;
941  --(*nrmax);
942  }
943  }
944 
945  if (*last > *limit / 2 + 2)
946  jupbn = *limit + 3 - *last;
947  else
948  jupbn = *last;
949 
950  errmin = elist[*last];
951 
952  jbnd = jupbn - 1;
953  for (i = *nrmax + 1; i <= jbnd; ++i) {
954  isucc = iord[i];
955  if (errmax >= elist[isucc]) {
956  iord[i - 1] = *maxerr;
957  for (j = i, k = jbnd; j <= jbnd; j++, k--) {
958  isucc = iord[k];
959  if (errmin < elist[isucc]) {
960  iord[k + 1] = *last;
961  goto Last;
962  }
963  iord[k + 1] = isucc;
964  }
965  iord[i] = *last;
966  goto Last;
967  }
968  iord[i - 1] = isucc;
969  }
970 
971  iord[jbnd] = *maxerr;
972  iord[jupbn] = *last;
973 
974 Last:
975 
976  *maxerr = iord[*nrmax];
977  *ermax = elist[*maxerr];
978  return;
979 }
980 
988 struct control {
989  int subdivisions;
990  double reltol;
991  double abstol;
992  control(int subdivisions_ = 100, double reltol_ = 1e-4,
993  double abstol_ = 1e-4);
994 };
995 
1010 template <class Integrand>
1011 struct Integral {
1012  typedef typename Integrand::Scalar Type;
1013 
1014  struct vectorized_integrand {
1015  Integrand f;
1016  vectorized_integrand(Integrand f_) : f(f_) {}
1017  void operator()(Type *x, int n, void *ex) {
1018  for (int i = 0; i < n; i++) x[i] = f(x[i]);
1019  }
1020  } fn;
1022  Integrand &integrand() { return fn.f; }
1023 
1024  Type epsabs, epsrel, result, abserr;
1025  int neval, ier, limit, lenw, last;
1026  std::vector<int> iwork;
1027  std::vector<Type> work;
1028  void setAccuracy(double epsrel_ = 1e-4, double epsabs_ = 1e-4) {
1029  epsabs = epsabs_;
1030  epsrel = epsrel_;
1031  result = 0;
1032  abserr = 1e4;
1033  neval = 0;
1034  ier = 0;
1035  last = 0;
1036  }
1037  void setWorkspace(int subdivisions = 100) {
1038  limit = subdivisions;
1039  lenw = 4 * limit;
1040  iwork.resize(limit);
1041  work.resize(lenw);
1042  }
1043  Type a, b, bound;
1044  int inf;
1045  void setBounds(Type a_, Type b_) {
1046  int a_finite = (a_ != -INFINITY) && (a_ != INFINITY);
1047  int b_finite = (b_ != -INFINITY) && (b_ != INFINITY);
1048  if (a_finite && b_finite) {
1049  inf = 0;
1050  a = a_;
1051  b = b_;
1052  } else if (a_finite && !b_finite) {
1053  inf = 1;
1054  bound = a_;
1055  } else if (!a_finite && b_finite) {
1056  inf = -1;
1057  bound = b_;
1058  } else {
1059  inf = 2;
1060  }
1061  }
1068  Integral(Integrand f_, Type a_, Type b_, control c = control()) : fn(f_) {
1069  setAccuracy(c.reltol, c.abstol);
1070  setWorkspace(c.subdivisions);
1071  setBounds(a_, b_);
1072  }
1073  Type operator()() {
1074  if (inf)
1075  Rdqagi(fn, NULL, &bound, &inf, &epsabs, &epsrel, &result, &abserr, &neval,
1076  &ier, &limit, &lenw, &last, &iwork[0], &work[0]);
1077  else
1078  Rdqags(fn, NULL, &a, &b, &epsabs, &epsrel, &result, &abserr, &neval, &ier,
1079  &limit, &lenw, &last, &iwork[0], &work[0]);
1080  return result;
1081  }
1082 };
1083 
1110 template <class Integrand>
1111 typename Integrand::Scalar integrate(Integrand f,
1112  typename Integrand::Scalar a = -INFINITY,
1113  typename Integrand::Scalar b = INFINITY,
1114  control c = control()) {
1115  Integral<Integrand> I(f, a, b, c);
1116  return I();
1117 }
1118 
1133 template <class Integrand>
1134 struct mvIntegral {
1135  typedef typename Integrand::Scalar Scalar;
1136  struct evaluator {
1137  typedef typename Integrand::Scalar Scalar;
1138  Integrand &f;
1139  Scalar &x;
1140  evaluator(Integrand &f_, Scalar &x_) : f(f_), x(x_) {}
1141  Scalar operator()(const Scalar &x_) {
1142  x = x_;
1143  return f();
1144  }
1145  } ev;
1146  control c;
1148  mvIntegral(Integrand &f_, Scalar &x_, Scalar a = -INFINITY,
1149  Scalar b = INFINITY, control c_ = control())
1150  : ev(f_, x_), c(c_), I(ev, a, b, c_) {}
1151  Scalar operator()() { return I(); }
1153  mvIntegral<mvIntegral> wrt(Scalar &x, Scalar a = -INFINITY,
1154  Scalar b = INFINITY) {
1155  return mvIntegral<mvIntegral>(*this, x, a, b, c);
1156  }
1157 };
1158 
1159 template <class Integrand>
1160 struct mvIntegral0 {
1161  typedef typename Integrand::Scalar Scalar;
1162  Integrand &f;
1163  control c;
1164  mvIntegral0(Integrand &f_, control c_) : f(f_), c(c_) {}
1166  mvIntegral<Integrand> wrt(Scalar &x, Scalar a = -INFINITY,
1167  Scalar b = INFINITY) {
1168  return mvIntegral<Integrand>(f, x, a, b, c);
1169  }
1170 };
1198 template <class Integrand>
1199 mvIntegral0<Integrand> mvIntegrate(Integrand &f, control c = control()) {
1200  return mvIntegral0<Integrand>(f, c);
1201 }
1202 
1203 } // namespace TMBad
1204 #endif // HAVE_INTEGRATE_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
mvIntegral< mvIntegral > wrt(Scalar &x, Scalar a=-INFINITY, Scalar b=INFINITY)
With respect to.
Integral(Integrand f_, Type a_, Type b_, control c=control())
Constructor.
Integrand & integrand()
Return reference to integrand so the user can change parameters.
diff --git a/TMBad_2vectorize_8hpp_source.html b/TMBad_2vectorize_8hpp_source.html index dcf405f77..957202b05 100644 --- a/TMBad_2vectorize_8hpp_source.html +++ b/TMBad_2vectorize_8hpp_source.html @@ -73,7 +73,7 @@
TMBad/vectorize.hpp
-
1 #ifndef HAVE_VECTORIZE_HPP
2 #define HAVE_VECTORIZE_HPP
3 // Autogenerated - do not edit by hand !
4 
5 namespace TMBad {
6 
7 typedef global::ad_segment ad_segment;
8 
9 template <class Type, bool S0 = 0, bool S1 = 0>
10 struct Vectorized {
11  Type x;
12 
13  static constexpr bool stride(bool j) { return j == 0 ? S0 : S1; }
14  operator Type() { return x; }
15  Vectorized(Type x) : x(x) {}
16  Vectorized() {}
17 };
18 
19 template <class Type, bool S0, bool S1>
20 struct ForwardArgs<Vectorized<Type, S0, S1> > : ForwardArgs<Type> {
21  typedef Vectorized<Type, S0, S1> T;
22  typedef ForwardArgs<Type> Base;
23  size_t k;
25  Type x(bool j) const {
26  return Base::values[Base::input(j) + k * T::stride(j)];
27  }
29  Type &y(Index j) { return Base::values[Base::output(j) + k]; }
30  ForwardArgs(const Base &x) : Base(x) {}
31 };
32 
33 template <class Type, bool S0, bool S1>
34 struct ReverseArgs<Vectorized<Type, S0, S1> > : ReverseArgs<Type> {
35  typedef Vectorized<Type, S0, S1> T;
36  typedef ReverseArgs<Type> Base;
37  size_t k;
39  Type x(bool j) const {
40  return Base::values[Base::input(j) + k * T::stride(j)];
41  }
43  Type y(Index j) const { return Base::values[Base::output(j) + k]; }
46  Type &dx(bool j) const {
47  return Base::derivs[Base::input(j) + k * T::stride(j)];
48  }
51  Type dy(Index j) const { return Base::derivs[Base::output(j) + k]; }
52  ReverseArgs(const Base &x) : Base(x) {}
53 };
54 
55 struct VSumOp : global::DynamicOperator<1, 1> {
56  static const bool is_linear = true;
57  size_t n;
58  VSumOp(size_t n);
59  template <class Type>
60  void forward(ForwardArgs<Type> &args) {
61  const Type *x = args.x_ptr(0);
62  Type &y = args.y(0);
63  y = 0;
64  for (size_t i = 0; i < n; i++) y += x[i];
65  }
66  template <class Type>
67  void reverse(ReverseArgs<Type> &args) {
68  Type *dx = args.dx_ptr(0);
69  const Type &dy = args.dy(0);
70  for (size_t i = 0; i < n; i++) dx[i] += dy;
71  }
72 
73  void dependencies(Args<> &args, Dependencies &dep) const;
74  static const bool have_dependencies = true;
76  static const bool implicit_dependencies = true;
78  static const bool allow_remap = false;
79  void forward(ForwardArgs<Writer> &args);
80  void reverse(ReverseArgs<Writer> &args);
81  const char *op_name();
82 };
83 
84 ad_aug sum(ad_segment x);
85 
86 template <class dummy = void>
87 ad_segment operator/(ad_segment x, ad_segment y);
88 template <class dummy = void>
89 ad_segment operator*(ad_segment x, ad_segment y);
90 template <class dummy = void>
91 ad_segment operator+(ad_segment x, ad_segment y);
92 template <class dummy = void>
93 ad_segment operator-(ad_segment x, ad_segment y);
94 template <class dummy = void>
95 ad_segment operator-(ad_segment x);
96 template <class dummy = void>
97 ad_segment &operator+=(ad_segment &x, ad_segment y) {
98  if ((x.size() == 1) && (x.size() < y.size())) y = ad_segment(sum(y), 1);
99  if (x.identicalZero())
100  x = y;
101  else
102  x = x + y;
103  return x;
104 }
105 template <class dummy = void>
106 ad_segment &operator-=(ad_segment &x, ad_segment y) {
107  if ((x.size() == 1) && (x.size() < y.size())) y = ad_segment(sum(y), 1);
108  if (x.identicalZero())
109  x = -y;
110  else
111  x = x - y;
112  return x;
113 }
114 
115 template <class Operator, bool S0 = false, bool S1 = false>
116 struct Vectorize : global::DynamicOperator<Operator::ninput, -1> {
117  size_t n;
118  static const bool have_input_size_output_size = true;
119  Index input_size() const { return Operator::ninput; }
120  Index output_size() const { return this->n; }
121  Vectorize(size_t n) : n(n) {}
122  void forward(ForwardArgs<Scalar> &args) {
123  ForwardArgs<Vectorized<Scalar, S0, S1> > vargs(args);
124  typename global::CPL<Operator>::type Op;
125  for (vargs.k = 0; vargs.k < n; vargs.k++) {
126  Op.forward(vargs);
127  }
128  }
129  void forward(ForwardArgs<Replay> &args) {
130  ad_segment x0(args.x_ptr(0), (S0 ? n : 1));
131  ad_segment x1;
132  if (Operator::ninput > 1) {
133  x1 = ad_segment(args.x_ptr(1), (S1 ? n : 1));
134  }
135  global::Complete<Vectorize> F(*this);
136  ad_segment y = F(x0, x1);
137  for (size_t i = 0; i < y.size(); i++) args.y(i) = y[i];
138  }
139  void reverse(ReverseArgs<Scalar> &args) {
140  ReverseArgs<Vectorized<Scalar, S0, S1> > vargs(args);
141  typename global::CPL<Operator>::type Op;
142  for (vargs.k = 0; vargs.k < n; vargs.k++) {
143  Op.reverse(vargs);
144  }
145  }
146  void reverse(ReverseArgs<Replay> &args) {
147  std::vector<ad_segment> v;
148  std::vector<ad_segment> d;
149  std::vector<Index> i;
150  ad_segment zero;
151 
152  v.push_back(ad_segment(args.x_ptr(0), (S0 ? n : 1)));
153  d.push_back(zero);
154  i.push_back(i.size());
155  if (Operator::ninput > 1) {
156  v.push_back(ad_segment(args.x_ptr(1), (S1 ? n : 1)));
157  d.push_back(zero);
158  i.push_back(i.size());
159  }
160 
161  v.push_back(ad_segment(args.y_ptr(0), n));
162  d.push_back(ad_segment(args.dy_ptr(0), n));
163 
164  ReverseArgs<ad_segment> vargs(i, v, d);
165 
166  vargs.ptr.first = 0;
167  vargs.ptr.second = Operator::ninput;
168  typename global::CPL<Operator>::type Op;
169  Op.reverse(vargs);
170 
171  ad_segment dx_left(args.dx_ptr(0), (S0 ? n : 1), true);
172  dx_left += vargs.dx(0);
173 
174  for (size_t i = 0; i < dx_left.size(); i++) args.dx_ptr(0)[i] = dx_left[i];
175  if (Operator::ninput > 1) {
176  ad_segment dx_right(args.dx_ptr(1), (S1 ? n : 1), true);
177  dx_right += vargs.dx(1);
178 
179  for (size_t i = 0; i < dx_right.size(); i++)
180  args.dx_ptr(1)[i] = dx_right[i];
181  }
182  }
183 
184  void dependencies(Args<> &args, Dependencies &dep) const {
185  dep.add_segment(args.input(0), (S0 ? n : 1));
186  if (Operator::ninput == 2) {
187  dep.add_segment(args.input(1), (S1 ? n : 1));
188  }
189  }
190  static const bool have_dependencies = true;
192  static const bool implicit_dependencies = true;
194  static const bool allow_remap = false;
195  void forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
196  void reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
197  const char *op_name() {
198  global::Complete<Operator> Op;
199  static const std::string name = std::string("V") + Op.op_name();
200  return name.c_str();
201  }
202  Vectorize(const ad_segment &x, const ad_segment &y)
203  : n(std::max(x.size(), y.size())) {}
204 };
205 template <class dummy>
206 ad_segment operator/(ad_segment x, ad_segment y) {
207  size_t n = std::max(x.size(), y.size());
208  if (x.size() > 1 && y.size() > 1) {
209  global::Complete<Vectorize<global::ad_plain::DivOp, 1, 1> > F(n);
210  return F(x, y);
211  } else if (x.size() > 1) {
212  global::Complete<Vectorize<global::ad_plain::DivOp, 1, 0> > F(n);
213  return F(x, y);
214  } else if (y.size() > 1) {
215  global::Complete<Vectorize<global::ad_plain::DivOp, 0, 1> > F(n);
216  return F(x, y);
217  } else {
218  global::Complete<Vectorize<global::ad_plain::DivOp, 0, 0> > F(n);
219  return F(x, y);
220  }
221  TMBAD_ASSERT(false);
222  return ad_segment();
223 }
224 template <class dummy>
225 ad_segment operator*(ad_segment x, ad_segment y) {
226  size_t n = std::max(x.size(), y.size());
227  if (x.size() > 1 && y.size() > 1) {
228  global::Complete<Vectorize<global::ad_plain::MulOp, 1, 1> > F(n);
229  return F(x, y);
230  } else if (x.size() > 1) {
231  global::Complete<Vectorize<global::ad_plain::MulOp, 1, 0> > F(n);
232  return F(x, y);
233  } else if (y.size() > 1) {
234  global::Complete<Vectorize<global::ad_plain::MulOp, 0, 1> > F(n);
235  return F(x, y);
236  } else {
237  global::Complete<Vectorize<global::ad_plain::MulOp, 0, 0> > F(n);
238  return F(x, y);
239  }
240  TMBAD_ASSERT(false);
241  return ad_segment();
242 }
243 template <class dummy>
244 ad_segment operator+(ad_segment x, ad_segment y) {
245  size_t n = std::max(x.size(), y.size());
246  if (x.size() > 1 && y.size() > 1) {
247  global::Complete<Vectorize<global::ad_plain::AddOp, 1, 1> > F(n);
248  return F(x, y);
249  } else if (x.size() > 1) {
250  global::Complete<Vectorize<global::ad_plain::AddOp, 1, 0> > F(n);
251  return F(x, y);
252  } else if (y.size() > 1) {
253  global::Complete<Vectorize<global::ad_plain::AddOp, 0, 1> > F(n);
254  return F(x, y);
255  } else {
256  global::Complete<Vectorize<global::ad_plain::AddOp, 0, 0> > F(n);
257  return F(x, y);
258  }
259  TMBAD_ASSERT(false);
260  return ad_segment();
261 }
262 template <class dummy>
263 ad_segment operator-(ad_segment x, ad_segment y) {
264  size_t n = std::max(x.size(), y.size());
265  if (x.size() > 1 && y.size() > 1) {
266  global::Complete<Vectorize<global::ad_plain::SubOp, 1, 1> > F(n);
267  return F(x, y);
268  } else if (x.size() > 1) {
269  global::Complete<Vectorize<global::ad_plain::SubOp, 1, 0> > F(n);
270  return F(x, y);
271  } else if (y.size() > 1) {
272  global::Complete<Vectorize<global::ad_plain::SubOp, 0, 1> > F(n);
273  return F(x, y);
274  } else {
275  global::Complete<Vectorize<global::ad_plain::SubOp, 0, 0> > F(n);
276  return F(x, y);
277  }
278  TMBAD_ASSERT(false);
279  return ad_segment();
280 }
281 template <class dummy = void>
282 ad_segment pow(ad_segment x, ad_segment y);
283 template <class dummy>
284 ad_segment pow(ad_segment x, ad_segment y) {
285  size_t n = std::max(x.size(), y.size());
286  if (x.size() > 1 && y.size() > 1) {
287  global::Complete<Vectorize<PowOp, 1, 1> > F(n);
288  return F(x, y);
289  } else if (x.size() > 1) {
290  global::Complete<Vectorize<PowOp, 1, 0> > F(n);
291  return F(x, y);
292  } else if (y.size() > 1) {
293  global::Complete<Vectorize<PowOp, 0, 1> > F(n);
294  return F(x, y);
295  } else {
296  global::Complete<Vectorize<PowOp, 0, 0> > F(n);
297  return F(x, y);
298  }
299  TMBAD_ASSERT(false);
300  return ad_segment();
301 }
302 template <class dummy>
303 ad_segment operator-(ad_segment x) {
304  size_t n = x.size();
305  global::Complete<Vectorize<global::ad_plain::NegOp, 1, 0> > F(n);
306  return F(x);
307 }
308 
309 template <class dummy = void>
310 ad_segment fabs(ad_segment x) {
311  size_t n = x.size();
312  global::Complete<Vectorize<AbsOp, 1, 0> > F(n);
313  return F(x);
314 }
315 template <class dummy = void>
316 ad_segment sin(ad_segment x) {
317  size_t n = x.size();
318  global::Complete<Vectorize<SinOp, 1, 0> > F(n);
319  return F(x);
320 }
321 template <class dummy = void>
322 ad_segment cos(ad_segment x) {
323  size_t n = x.size();
324  global::Complete<Vectorize<CosOp, 1, 0> > F(n);
325  return F(x);
326 }
327 template <class dummy = void>
328 ad_segment exp(ad_segment x) {
329  size_t n = x.size();
330  global::Complete<Vectorize<ExpOp, 1, 0> > F(n);
331  return F(x);
332 }
333 template <class dummy = void>
334 ad_segment log(ad_segment x) {
335  size_t n = x.size();
336  global::Complete<Vectorize<LogOp, 1, 0> > F(n);
337  return F(x);
338 }
339 template <class dummy = void>
340 ad_segment sqrt(ad_segment x) {
341  size_t n = x.size();
342  global::Complete<Vectorize<SqrtOp, 1, 0> > F(n);
343  return F(x);
344 }
345 template <class dummy = void>
346 ad_segment tan(ad_segment x) {
347  size_t n = x.size();
348  global::Complete<Vectorize<TanOp, 1, 0> > F(n);
349  return F(x);
350 }
351 template <class dummy = void>
352 ad_segment sinh(ad_segment x) {
353  size_t n = x.size();
354  global::Complete<Vectorize<SinhOp, 1, 0> > F(n);
355  return F(x);
356 }
357 template <class dummy = void>
358 ad_segment cosh(ad_segment x) {
359  size_t n = x.size();
360  global::Complete<Vectorize<CoshOp, 1, 0> > F(n);
361  return F(x);
362 }
363 template <class dummy = void>
364 ad_segment tanh(ad_segment x) {
365  size_t n = x.size();
366  global::Complete<Vectorize<TanhOp, 1, 0> > F(n);
367  return F(x);
368 }
369 template <class dummy = void>
370 ad_segment expm1(ad_segment x) {
371  size_t n = x.size();
372  global::Complete<Vectorize<Expm1, 1, 0> > F(n);
373  return F(x);
374 }
375 template <class dummy = void>
376 ad_segment log1p(ad_segment x) {
377  size_t n = x.size();
378  global::Complete<Vectorize<Log1p, 1, 0> > F(n);
379  return F(x);
380 }
381 template <class dummy = void>
382 ad_segment asin(ad_segment x) {
383  size_t n = x.size();
384  global::Complete<Vectorize<AsinOp, 1, 0> > F(n);
385  return F(x);
386 }
387 template <class dummy = void>
388 ad_segment acos(ad_segment x) {
389  size_t n = x.size();
390  global::Complete<Vectorize<AcosOp, 1, 0> > F(n);
391  return F(x);
392 }
393 template <class dummy = void>
394 ad_segment atan(ad_segment x) {
395  size_t n = x.size();
396  global::Complete<Vectorize<AtanOp, 1, 0> > F(n);
397  return F(x);
398 }
399 template <class T>
400 struct ScalarPack {
401  static const int size = (sizeof(T) - 1) / sizeof(Scalar) + 1;
402 };
403 
406 struct SegmentRef {
407  global *glob_ptr;
408  Index offset;
409  Index size;
410  Scalar *value_ptr();
411  Scalar *deriv_ptr();
412  SegmentRef();
413  SegmentRef(const Scalar *x);
414  SegmentRef(global *g, Index o, Index s);
415  SegmentRef(const ad_segment &x);
416  bool isNull();
417  void resize(ad_segment &pack, Index n);
418 };
419 
420 ad_segment pack(const ad_segment &x);
421 ad_segment unpack(const ad_segment &x);
422 
435 struct PackOp : global::DynamicOperator<1, ScalarPack<SegmentRef>::size> {
437  static const Index K = ScalarPack<SegmentRef>::size;
439  Index n;
440  PackOp(const Index n);
442  void forward(ForwardArgs<Scalar> &args);
444  void forward(ForwardArgs<Replay> &args);
446  void reverse(ReverseArgs<Scalar> &args);
448  void reverse(ReverseArgs<Replay> &args);
449  const char *op_name();
451  static const bool allow_remap = false;
452  static const bool have_dependencies = true;
453  static const bool implicit_dependencies = true;
454  void dependencies(Args<> &args, Dependencies &dep) const;
455 
456  template <class T>
457  void forward(ForwardArgs<T> &args) {
458  TMBAD_ASSERT2(false, "PackOp: Invalid method!");
459  }
460  template <class T>
461  void reverse(ReverseArgs<T> &args) {
462  TMBAD_ASSERT2(false, "PackOp: Invalid method!");
463  }
464 };
465 
469  static const Index K = ScalarPack<SegmentRef>::size;
471  Index noutput;
472  UnpkOp(const Index n);
474  void forward(ForwardArgs<Scalar> &args);
475  static const bool add_forward_replay_copy = true;
477  void reverse(ReverseArgs<Scalar> &args);
479  void reverse(ReverseArgs<Replay> &args);
480  const char *op_name();
481 
483  static const bool allow_remap = false;
484  static const bool have_dependencies = true;
485  static const bool implicit_dependencies = true;
486  void dependencies(Args<> &args, Dependencies &dep) const;
487 
488  template <class T>
489  void forward(ForwardArgs<T> &args) {
490  TMBAD_ASSERT2(false, "UnpkOp: Invalid method!");
491  }
492  template <class T>
493  void reverse(ReverseArgs<T> &args) {
494  TMBAD_ASSERT2(false, "UnpkOp: Invalid method!");
495  }
496 };
497 
499 ad_segment pack(const ad_segment &x);
500 
502 ad_segment unpack(const ad_segment &x);
503 
505 template <class T>
506 ad_segment unpack(const std::vector<T> &x, Index j) {
507  Index K = ScalarPack<SegmentRef>::size;
508  ad_segment x_(x[j * K], K);
509  return unpack(x_);
510 }
511 Scalar *unpack(const std::vector<Scalar> &x, Index j);
512 
513 template <class T>
514 std::vector<T> repack(const std::vector<T> &x) {
515  Index K = ScalarPack<SegmentRef>::size;
516  size_t n = x.size() / K;
517  std::vector<T> y;
518  for (size_t j = 0; j < n; j++) {
519  ad_segment x_(x[j * K], K);
520  SegmentRef sr(x_);
521  ad_segment orig(sr.offset, sr.size);
522  ad_segment yj = pack(orig);
523  for (size_t i = 0; i < K; i++) y.push_back(yj[i]);
524  }
525  return y;
526 }
527 
528 std::vector<ad_aug> concat(const std::vector<ad_segment> &x);
529 
530 } // namespace TMBad
531 #endif // HAVE_VECTORIZE_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_VECTORIZE_HPP
2 #define HAVE_VECTORIZE_HPP
3 // Autogenerated - do not edit by hand !
4 
5 namespace TMBad {
6 
7 typedef global::ad_segment ad_segment;
8 
9 template <class Type, bool S0 = 0, bool S1 = 0>
10 struct Vectorized {
11  Type x;
12 
13  static constexpr bool stride(bool j) { return j == 0 ? S0 : S1; }
14  operator Type() { return x; }
15  Vectorized(Type x) : x(x) {}
16  Vectorized() {}
17 };
18 
19 template <class Type, bool S0, bool S1>
20 struct ForwardArgs<Vectorized<Type, S0, S1> > : ForwardArgs<Type> {
21  typedef Vectorized<Type, S0, S1> T;
22  typedef ForwardArgs<Type> Base;
23  size_t k;
25  Type x(bool j) const {
26  return Base::values[Base::input(j) + k * T::stride(j)];
27  }
29  Type &y(Index j) { return Base::values[Base::output(j) + k]; }
30  ForwardArgs(const Base &x) : Base(x) {}
31 };
32 
33 template <class Type, bool S0, bool S1>
34 struct ReverseArgs<Vectorized<Type, S0, S1> > : ReverseArgs<Type> {
35  typedef Vectorized<Type, S0, S1> T;
36  typedef ReverseArgs<Type> Base;
37  size_t k;
39  Type x(bool j) const {
40  return Base::values[Base::input(j) + k * T::stride(j)];
41  }
43  Type y(Index j) const { return Base::values[Base::output(j) + k]; }
46  Type &dx(bool j) const {
47  return Base::derivs[Base::input(j) + k * T::stride(j)];
48  }
51  Type dy(Index j) const { return Base::derivs[Base::output(j) + k]; }
52  ReverseArgs(const Base &x) : Base(x) {}
53 };
54 
55 struct VSumOp : global::DynamicOperator<1, 1> {
56  static const bool is_linear = true;
57  size_t n;
58  VSumOp(size_t n);
59  template <class Type>
60  void forward(ForwardArgs<Type> &args) {
61  const Type *x = args.x_ptr(0);
62  Type &y = args.y(0);
63  y = 0;
64  for (size_t i = 0; i < n; i++) y += x[i];
65  }
66  template <class Type>
67  void reverse(ReverseArgs<Type> &args) {
68  Type *dx = args.dx_ptr(0);
69  const Type &dy = args.dy(0);
70  for (size_t i = 0; i < n; i++) dx[i] += dy;
71  }
72 
73  void dependencies(Args<> &args, Dependencies &dep) const;
74  static const bool have_dependencies = true;
76  static const bool implicit_dependencies = true;
78  static const bool allow_remap = false;
79  void forward(ForwardArgs<Writer> &args);
80  void reverse(ReverseArgs<Writer> &args);
81  const char *op_name();
82 };
83 
84 ad_aug sum(ad_segment x);
85 
86 template <class dummy = void>
87 ad_segment operator/(ad_segment x, ad_segment y);
88 template <class dummy = void>
89 ad_segment operator*(ad_segment x, ad_segment y);
90 template <class dummy = void>
91 ad_segment operator+(ad_segment x, ad_segment y);
92 template <class dummy = void>
93 ad_segment operator-(ad_segment x, ad_segment y);
94 template <class dummy = void>
95 ad_segment operator-(ad_segment x);
96 template <class dummy = void>
97 ad_segment &operator+=(ad_segment &x, ad_segment y) {
98  if ((x.size() == 1) && (x.size() < y.size())) y = ad_segment(sum(y), 1);
99  if (x.identicalZero())
100  x = y;
101  else
102  x = x + y;
103  return x;
104 }
105 template <class dummy = void>
106 ad_segment &operator-=(ad_segment &x, ad_segment y) {
107  if ((x.size() == 1) && (x.size() < y.size())) y = ad_segment(sum(y), 1);
108  if (x.identicalZero())
109  x = -y;
110  else
111  x = x - y;
112  return x;
113 }
114 
115 template <class Operator, bool S0 = false, bool S1 = false>
116 struct Vectorize : global::DynamicOperator<Operator::ninput, -1> {
117  size_t n;
118  static const bool have_input_size_output_size = true;
119  Index input_size() const { return Operator::ninput; }
120  Index output_size() const { return this->n; }
121  Vectorize(size_t n) : n(n) {}
122  void forward(ForwardArgs<Scalar> &args) {
123  ForwardArgs<Vectorized<Scalar, S0, S1> > vargs(args);
124  typename global::CPL<Operator>::type Op;
125  for (vargs.k = 0; vargs.k < n; vargs.k++) {
126  Op.forward(vargs);
127  }
128  }
129  void forward(ForwardArgs<Replay> &args) {
130  ad_segment x0(args.x_ptr(0), (S0 ? n : 1));
131  ad_segment x1;
132  if (Operator::ninput > 1) {
133  x1 = ad_segment(args.x_ptr(1), (S1 ? n : 1));
134  }
135  global::Complete<Vectorize> F(*this);
136  ad_segment y = F(x0, x1);
137  for (size_t i = 0; i < y.size(); i++) args.y(i) = y[i];
138  }
139  void reverse(ReverseArgs<Scalar> &args) {
140  ReverseArgs<Vectorized<Scalar, S0, S1> > vargs(args);
141  typename global::CPL<Operator>::type Op;
142  for (vargs.k = 0; vargs.k < n; vargs.k++) {
143  Op.reverse(vargs);
144  }
145  }
146  void reverse(ReverseArgs<Replay> &args) {
147  std::vector<ad_segment> v;
148  std::vector<ad_segment> d;
149  std::vector<Index> i;
150  ad_segment zero;
151 
152  v.push_back(ad_segment(args.x_ptr(0), (S0 ? n : 1)));
153  d.push_back(zero);
154  i.push_back(i.size());
155  if (Operator::ninput > 1) {
156  v.push_back(ad_segment(args.x_ptr(1), (S1 ? n : 1)));
157  d.push_back(zero);
158  i.push_back(i.size());
159  }
160 
161  v.push_back(ad_segment(args.y_ptr(0), n));
162  d.push_back(ad_segment(args.dy_ptr(0), n));
163 
164  ReverseArgs<ad_segment> vargs(i, v, d);
165 
166  vargs.ptr.first = 0;
167  vargs.ptr.second = Operator::ninput;
168  typename global::CPL<Operator>::type Op;
169  Op.reverse(vargs);
170 
171  ad_segment dx_left(args.dx_ptr(0), (S0 ? n : 1), true);
172  dx_left += vargs.dx(0);
173 
174  for (size_t i = 0; i < dx_left.size(); i++) args.dx_ptr(0)[i] = dx_left[i];
175  if (Operator::ninput > 1) {
176  ad_segment dx_right(args.dx_ptr(1), (S1 ? n : 1), true);
177  dx_right += vargs.dx(1);
178 
179  for (size_t i = 0; i < dx_right.size(); i++)
180  args.dx_ptr(1)[i] = dx_right[i];
181  }
182  }
183 
184  void dependencies(Args<> &args, Dependencies &dep) const {
185  dep.add_segment(args.input(0), (S0 ? n : 1));
186  if (Operator::ninput == 2) {
187  dep.add_segment(args.input(1), (S1 ? n : 1));
188  }
189  }
190  static const bool have_dependencies = true;
192  static const bool implicit_dependencies = true;
194  static const bool allow_remap = false;
195  void forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
196  void reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
197  const char *op_name() {
198  global::Complete<Operator> Op;
199  static const std::string name = std::string("V") + Op.op_name();
200  return name.c_str();
201  }
202  Vectorize(const ad_segment &x, const ad_segment &y)
203  : n(std::max(x.size(), y.size())) {}
204 };
205 template <class dummy>
206 ad_segment operator/(ad_segment x, ad_segment y) {
207  size_t n = std::max(x.size(), y.size());
208  if (x.size() > 1 && y.size() > 1) {
209  global::Complete<Vectorize<global::ad_plain::DivOp, 1, 1> > F(n);
210  return F(x, y);
211  } else if (x.size() > 1) {
212  global::Complete<Vectorize<global::ad_plain::DivOp, 1, 0> > F(n);
213  return F(x, y);
214  } else if (y.size() > 1) {
215  global::Complete<Vectorize<global::ad_plain::DivOp, 0, 1> > F(n);
216  return F(x, y);
217  } else {
218  global::Complete<Vectorize<global::ad_plain::DivOp, 0, 0> > F(n);
219  return F(x, y);
220  }
221  TMBAD_ASSERT(false);
222  return ad_segment();
223 }
224 template <class dummy>
225 ad_segment operator*(ad_segment x, ad_segment y) {
226  size_t n = std::max(x.size(), y.size());
227  if (x.size() > 1 && y.size() > 1) {
228  global::Complete<Vectorize<global::ad_plain::MulOp, 1, 1> > F(n);
229  return F(x, y);
230  } else if (x.size() > 1) {
231  global::Complete<Vectorize<global::ad_plain::MulOp, 1, 0> > F(n);
232  return F(x, y);
233  } else if (y.size() > 1) {
234  global::Complete<Vectorize<global::ad_plain::MulOp, 0, 1> > F(n);
235  return F(x, y);
236  } else {
237  global::Complete<Vectorize<global::ad_plain::MulOp, 0, 0> > F(n);
238  return F(x, y);
239  }
240  TMBAD_ASSERT(false);
241  return ad_segment();
242 }
243 template <class dummy>
244 ad_segment operator+(ad_segment x, ad_segment y) {
245  size_t n = std::max(x.size(), y.size());
246  if (x.size() > 1 && y.size() > 1) {
247  global::Complete<Vectorize<global::ad_plain::AddOp, 1, 1> > F(n);
248  return F(x, y);
249  } else if (x.size() > 1) {
250  global::Complete<Vectorize<global::ad_plain::AddOp, 1, 0> > F(n);
251  return F(x, y);
252  } else if (y.size() > 1) {
253  global::Complete<Vectorize<global::ad_plain::AddOp, 0, 1> > F(n);
254  return F(x, y);
255  } else {
256  global::Complete<Vectorize<global::ad_plain::AddOp, 0, 0> > F(n);
257  return F(x, y);
258  }
259  TMBAD_ASSERT(false);
260  return ad_segment();
261 }
262 template <class dummy>
263 ad_segment operator-(ad_segment x, ad_segment y) {
264  size_t n = std::max(x.size(), y.size());
265  if (x.size() > 1 && y.size() > 1) {
266  global::Complete<Vectorize<global::ad_plain::SubOp, 1, 1> > F(n);
267  return F(x, y);
268  } else if (x.size() > 1) {
269  global::Complete<Vectorize<global::ad_plain::SubOp, 1, 0> > F(n);
270  return F(x, y);
271  } else if (y.size() > 1) {
272  global::Complete<Vectorize<global::ad_plain::SubOp, 0, 1> > F(n);
273  return F(x, y);
274  } else {
275  global::Complete<Vectorize<global::ad_plain::SubOp, 0, 0> > F(n);
276  return F(x, y);
277  }
278  TMBAD_ASSERT(false);
279  return ad_segment();
280 }
281 template <class dummy = void>
282 ad_segment pow(ad_segment x, ad_segment y);
283 template <class dummy>
284 ad_segment pow(ad_segment x, ad_segment y) {
285  size_t n = std::max(x.size(), y.size());
286  if (x.size() > 1 && y.size() > 1) {
287  global::Complete<Vectorize<PowOp, 1, 1> > F(n);
288  return F(x, y);
289  } else if (x.size() > 1) {
290  global::Complete<Vectorize<PowOp, 1, 0> > F(n);
291  return F(x, y);
292  } else if (y.size() > 1) {
293  global::Complete<Vectorize<PowOp, 0, 1> > F(n);
294  return F(x, y);
295  } else {
296  global::Complete<Vectorize<PowOp, 0, 0> > F(n);
297  return F(x, y);
298  }
299  TMBAD_ASSERT(false);
300  return ad_segment();
301 }
302 template <class dummy>
303 ad_segment operator-(ad_segment x) {
304  size_t n = x.size();
305  global::Complete<Vectorize<global::ad_plain::NegOp, 1, 0> > F(n);
306  return F(x);
307 }
308 
309 template <class dummy = void>
310 ad_segment fabs(ad_segment x) {
311  size_t n = x.size();
312  global::Complete<Vectorize<AbsOp, 1, 0> > F(n);
313  return F(x);
314 }
315 template <class dummy = void>
316 ad_segment sin(ad_segment x) {
317  size_t n = x.size();
318  global::Complete<Vectorize<SinOp, 1, 0> > F(n);
319  return F(x);
320 }
321 template <class dummy = void>
322 ad_segment cos(ad_segment x) {
323  size_t n = x.size();
324  global::Complete<Vectorize<CosOp, 1, 0> > F(n);
325  return F(x);
326 }
327 template <class dummy = void>
328 ad_segment exp(ad_segment x) {
329  size_t n = x.size();
330  global::Complete<Vectorize<ExpOp, 1, 0> > F(n);
331  return F(x);
332 }
333 template <class dummy = void>
334 ad_segment log(ad_segment x) {
335  size_t n = x.size();
336  global::Complete<Vectorize<LogOp, 1, 0> > F(n);
337  return F(x);
338 }
339 template <class dummy = void>
340 ad_segment sqrt(ad_segment x) {
341  size_t n = x.size();
342  global::Complete<Vectorize<SqrtOp, 1, 0> > F(n);
343  return F(x);
344 }
345 template <class dummy = void>
346 ad_segment tan(ad_segment x) {
347  size_t n = x.size();
348  global::Complete<Vectorize<TanOp, 1, 0> > F(n);
349  return F(x);
350 }
351 template <class dummy = void>
352 ad_segment sinh(ad_segment x) {
353  size_t n = x.size();
354  global::Complete<Vectorize<SinhOp, 1, 0> > F(n);
355  return F(x);
356 }
357 template <class dummy = void>
358 ad_segment cosh(ad_segment x) {
359  size_t n = x.size();
360  global::Complete<Vectorize<CoshOp, 1, 0> > F(n);
361  return F(x);
362 }
363 template <class dummy = void>
364 ad_segment tanh(ad_segment x) {
365  size_t n = x.size();
366  global::Complete<Vectorize<TanhOp, 1, 0> > F(n);
367  return F(x);
368 }
369 template <class dummy = void>
370 ad_segment expm1(ad_segment x) {
371  size_t n = x.size();
372  global::Complete<Vectorize<Expm1, 1, 0> > F(n);
373  return F(x);
374 }
375 template <class dummy = void>
376 ad_segment log1p(ad_segment x) {
377  size_t n = x.size();
378  global::Complete<Vectorize<Log1p, 1, 0> > F(n);
379  return F(x);
380 }
381 template <class dummy = void>
382 ad_segment asin(ad_segment x) {
383  size_t n = x.size();
384  global::Complete<Vectorize<AsinOp, 1, 0> > F(n);
385  return F(x);
386 }
387 template <class dummy = void>
388 ad_segment acos(ad_segment x) {
389  size_t n = x.size();
390  global::Complete<Vectorize<AcosOp, 1, 0> > F(n);
391  return F(x);
392 }
393 template <class dummy = void>
394 ad_segment atan(ad_segment x) {
395  size_t n = x.size();
396  global::Complete<Vectorize<AtanOp, 1, 0> > F(n);
397  return F(x);
398 }
399 template <class T>
400 struct ScalarPack {
401  static const int size = (sizeof(T) - 1) / sizeof(Scalar) + 1;
402 };
403 
406 struct SegmentRef {
407  global *glob_ptr;
408  Index offset;
409  Index size;
410  Scalar *value_ptr();
411  Scalar *deriv_ptr();
412  SegmentRef();
413  SegmentRef(const Scalar *x);
414  SegmentRef(global *g, Index o, Index s);
415  SegmentRef(const ad_segment &x);
416  bool isNull();
417  void resize(ad_segment &pack, Index n);
418 };
419 
420 ad_segment pack(const ad_segment &x);
421 ad_segment unpack(const ad_segment &x);
422 
435 struct PackOp : global::DynamicOperator<1, ScalarPack<SegmentRef>::size> {
437  static const Index K = ScalarPack<SegmentRef>::size;
439  Index n;
440  PackOp(const Index n);
442  void forward(ForwardArgs<Scalar> &args);
444  void forward(ForwardArgs<Replay> &args);
446  void reverse(ReverseArgs<Scalar> &args);
448  void reverse(ReverseArgs<Replay> &args);
449  const char *op_name();
451  static const bool allow_remap = false;
452  static const bool have_dependencies = true;
453  static const bool implicit_dependencies = true;
454  void dependencies(Args<> &args, Dependencies &dep) const;
455 
456  template <class T>
457  void forward(ForwardArgs<T> &args) {
458  TMBAD_ASSERT2(false, "PackOp: Invalid method!");
459  }
460  template <class T>
461  void reverse(ReverseArgs<T> &args) {
462  TMBAD_ASSERT2(false, "PackOp: Invalid method!");
463  }
464 };
465 
469  static const Index K = ScalarPack<SegmentRef>::size;
471  Index noutput;
472  UnpkOp(const Index n);
474  void forward(ForwardArgs<Scalar> &args);
475  static const bool add_forward_replay_copy = true;
477  void reverse(ReverseArgs<Scalar> &args);
479  void reverse(ReverseArgs<Replay> &args);
480  const char *op_name();
481 
483  static const bool allow_remap = false;
484  static const bool have_dependencies = true;
485  static const bool implicit_dependencies = true;
486  void dependencies(Args<> &args, Dependencies &dep) const;
487 
488  template <class T>
489  void forward(ForwardArgs<T> &args) {
490  TMBAD_ASSERT2(false, "UnpkOp: Invalid method!");
491  }
492  template <class T>
493  void reverse(ReverseArgs<T> &args) {
494  TMBAD_ASSERT2(false, "UnpkOp: Invalid method!");
495  }
496 };
497 
499 ad_segment pack(const ad_segment &x);
500 
502 ad_segment unpack(const ad_segment &x);
503 
505 template <class T>
506 ad_segment unpack(const std::vector<T> &x, Index j) {
507  Index K = ScalarPack<SegmentRef>::size;
508  ad_segment x_(x[j * K], K);
509  return unpack(x_);
510 }
511 Scalar *unpack(const std::vector<Scalar> &x, Index j);
512 
513 template <class T>
514 std::vector<T> repack(const std::vector<T> &x) {
515  Index K = ScalarPack<SegmentRef>::size;
516  size_t n = x.size() / K;
517  std::vector<T> y;
518  for (size_t j = 0; j < n; j++) {
519  ad_segment x_(x[j * K], K);
520  SegmentRef sr(x_);
521  ad_segment orig(sr.offset, sr.size);
522  ad_segment yj = pack(orig);
523  for (size_t i = 0; i < K; i++) y.push_back(yj[i]);
524  }
525  return y;
526 }
527 
528 std::vector<ad_aug> concat(const std::vector<ad_segment> &x);
529 
530 } // namespace TMBad
531 #endif // HAVE_VECTORIZE_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
Index noutput
Unpacked size.
Access input/output values and derivatives during a reverse pass. Write access granted for the input ...
Definition: global.hpp:311
diff --git a/TMBad_8cpp_source.html b/TMBad_8cpp_source.html index 82e84bf75..566f0b022 100644 --- a/TMBad_8cpp_source.html +++ b/TMBad_8cpp_source.html @@ -73,7 +73,7 @@
TMBad.cpp
-
1 // Autogenerated - do not edit by hand !
2 #include "TMBad.hpp"
3 namespace TMBad {
4 
5 SpJacFun_config::SpJacFun_config() : compress(false), index_remap(true) {}
6 } // namespace TMBad
7 // Autogenerated - do not edit by hand !
8 #include "ad_blas.hpp"
9 namespace TMBad {
10 
11 vmatrix matmul(const vmatrix &x, const vmatrix &y) {
12  vmatrix z(x.rows(), y.cols());
13  Map<vmatrix> zm(&z(0), z.rows(), z.cols());
14  matmul<false, false, false, false>(x, y, zm);
15  return z;
16 }
17 
18 dmatrix matmul(const dmatrix &x, const dmatrix &y) { return x * y; }
19 } // namespace TMBad
20 // Autogenerated - do not edit by hand !
21 #include "checkpoint.hpp"
22 namespace TMBad {
23 
24 bool ParametersChanged::operator()(const std::vector<Scalar> &x) {
25  bool change = (x != x_prev);
26  if (change) {
27  x_prev = x;
28  }
29  return change;
30 }
31 } // namespace TMBad
32 // Autogenerated - do not edit by hand !
33 #include "code_generator.hpp"
34 namespace TMBad {
35 
36 void searchReplace(std::string &str, const std::string &oldStr,
37  const std::string &newStr) {
38  std::string::size_type pos = 0u;
39  while ((pos = str.find(oldStr, pos)) != std::string::npos) {
40  str.replace(pos, oldStr.length(), newStr);
41  pos += newStr.length();
42  }
43 }
44 
45 std::string code_config::float_ptr() { return float_str + (gpu ? "**" : "*"); }
46 
47 std::string code_config::void_str() {
48  return (gpu ? "__device__ void" : "extern \"C\" void");
49 }
50 
51 void code_config::init_code() {
52  if (gpu) {
53  *cout << indent << "int idx = threadIdx.x;" << std::endl;
54  }
55 }
56 
57 void code_config::write_header_comment() {
58  if (header_comment.length() > 0) *cout << header_comment << std::endl;
59 }
60 
61 code_config::code_config()
62  : asm_comments(true),
63  gpu(true),
64  indent(" "),
65  header_comment("// Autogenerated - do not edit by hand !"),
66  float_str(xstringify(TMBAD_SCALAR_TYPE)),
67  cout(&Rcout) {}
68 
69 void write_common(std::ostringstream &buffer, code_config cfg, size_t node) {
70  std::ostream &cout = *cfg.cout;
71  using std::endl;
72  using std::left;
73  using std::setw;
74  std::string indent = cfg.indent;
75  if (cfg.asm_comments)
76  cout << indent << "asm(\"// Node: " << node << "\");" << endl;
77  bool empty_buffer = (buffer.tellp() == 0);
78  if (!empty_buffer) {
79  std::string str = buffer.str();
80  if (cfg.gpu) {
81  std::string pattern = "]";
82  std::string replace = "][idx]";
83  searchReplace(str, pattern, replace);
84  }
85  searchReplace(str, ";v", "; v");
86  searchReplace(str, ";d", "; d");
87  cout << indent << str << endl;
88  }
89 }
90 
91 void write_forward(global &glob, code_config cfg) {
92  using std::endl;
93  using std::left;
94  using std::setw;
95  std::ostream &cout = *cfg.cout;
96  cfg.write_header_comment();
97  cout << cfg.void_str() << " forward(" << cfg.float_ptr() << " v) {" << endl;
98  cfg.init_code();
99  ForwardArgs<Writer> args(glob.inputs, glob.values);
100  for (size_t i = 0; i < glob.opstack.size(); i++) {
101  std::ostringstream buffer;
102  Writer::cout = &buffer;
103  glob.opstack[i]->forward(args);
104  write_common(buffer, cfg, i);
105  glob.opstack[i]->increment(args.ptr);
106  }
107  cout << "}" << endl;
108 }
109 
110 void write_reverse(global &glob, code_config cfg) {
111  using std::endl;
112  using std::left;
113  using std::setw;
114  std::ostream &cout = *cfg.cout;
115  cfg.write_header_comment();
116  cout << cfg.void_str() << " reverse(" << cfg.float_ptr() << " v, "
117  << cfg.float_ptr() << " d) {" << endl;
118  cfg.init_code();
119  ReverseArgs<Writer> args(glob.inputs, glob.values);
120  for (size_t i = glob.opstack.size(); i > 0;) {
121  i--;
122  glob.opstack[i]->decrement(args.ptr);
123  std::ostringstream buffer;
124  Writer::cout = &buffer;
125  glob.opstack[i]->reverse(args);
126  write_common(buffer, cfg, i);
127  }
128  cout << "}" << endl;
129 }
130 
131 void write_all(global glob, code_config cfg) {
132  using std::endl;
133  using std::left;
134  using std::setw;
135  std::ostream &cout = *cfg.cout;
136  cout << "#include \"global.hpp\"" << endl;
137  cout << "#include \"ad_blas.hpp\"" << endl;
138  write_forward(glob, cfg);
139  write_reverse(glob, cfg);
140  cout << "int main() {}" << endl;
141 }
142 } // namespace TMBad
143 #ifndef _WIN32
144 // Autogenerated - do not edit by hand !
145 #include "compile.hpp"
146 namespace TMBad {
147 
148 void compile(global &glob, code_config cfg) {
149  cfg.gpu = false;
150  cfg.asm_comments = false;
151  std::ofstream file;
152  file.open("tmp.cpp");
153  cfg.cout = &file;
154 
155  *cfg.cout << "#include <cmath>" << std::endl;
156  *cfg.cout
157  << "template<class T>T sign(const T &x) { return (x > 0) - (x < 0); }"
158  << std::endl;
159 
160  write_forward(glob, cfg);
161 
162  write_reverse(glob, cfg);
163 
164  int out = system("g++ -O3 -g tmp.cpp -o tmp.so -shared -fPIC");
165  if (out != 0) {
166  }
167 
168  void *handle = dlopen("./tmp.so", RTLD_NOW);
169  if (handle != NULL) {
170  Rcout << "Loading compiled code!" << std::endl;
171  glob.forward_compiled =
172  reinterpret_cast<void (*)(Scalar *)>(dlsym(handle, "forward"));
173  glob.reverse_compiled = reinterpret_cast<void (*)(Scalar *, Scalar *)>(
174  dlsym(handle, "reverse"));
175  }
176 }
177 } // namespace TMBad
178 #endif
179 // Autogenerated - do not edit by hand !
180 #include "compression.hpp"
181 namespace TMBad {
182 
183 std::ostream &operator<<(std::ostream &os, const period &x) {
184  os << "begin: " << x.begin;
185  os << " size: " << x.size;
186  os << " rep: " << x.rep;
187  return os;
188 }
189 
190 std::vector<period> split_period(global *glob, period p,
191  size_t max_period_size) {
192  typedef std::ptrdiff_t ptrdiff_t;
193  glob->subgraph_cache_ptr();
194 
195  size_t offset = glob->subgraph_ptr[p.begin].first;
196 
197  size_t nrow = 0;
198  for (size_t i = 0; i < p.size; i++) {
199  nrow += glob->opstack[p.begin + i]->input_size();
200  }
201 
202  size_t ncol = p.rep;
203 
204  matrix_view<Index> x(&(glob->inputs[offset]), nrow, ncol);
205 
206  std::vector<bool> marks(ncol - 1, false);
207 
208  for (size_t i = 0; i < nrow; i++) {
209  std::vector<period> pd =
210  periodic<ptrdiff_t>(x.row_diff<ptrdiff_t>(i), max_period_size)
211  .find_all();
212 
213  for (size_t j = 0; j < pd.size(); j++) {
214  if (pd[j].begin > 0) {
215  marks[pd[j].begin - 1] = true;
216  }
217  size_t end = pd[j].begin + pd[j].size * pd[j].rep;
218  if (end < marks.size()) marks[end] = true;
219  }
220  }
221 
222  std::vector<period> ans;
223  p.rep = 1;
224  ans.push_back(p);
225  for (size_t j = 0; j < marks.size(); j++) {
226  if (marks[j]) {
227  period pnew = p;
228  pnew.begin = p.begin + (j + 1) * p.size;
229  pnew.rep = 1;
230  ans.push_back(pnew);
231  } else {
232  ans.back().rep++;
233  }
234  }
235 
236  return ans;
237 }
238 
239 size_t compressed_input::input_size() const { return n; }
240 
241 void compressed_input::update_increment_pattern() const {
242  for (size_t i = 0; i < (size_t)np; i++)
243  increment_pattern[which_periodic[i]] =
244  period_data[period_offsets[i] + counter % period_sizes[i]];
245 }
246 
247 void compressed_input::increment(Args<> &args) const {
248  if (np) {
249  update_increment_pattern();
250  counter++;
251  }
252  for (size_t i = 0; i < n; i++) inputs[i] += increment_pattern[i];
253  args.ptr.first = 0;
254 }
255 
256 void compressed_input::decrement(Args<> &args) const {
257  args.ptr.first = input_size();
258  for (size_t i = 0; i < n; i++) inputs[i] -= increment_pattern[i];
259  if (np) {
260  counter--;
261  update_increment_pattern();
262  }
263 }
264 
265 void compressed_input::forward_init(Args<> &args) const {
266  counter = 0;
267  inputs.resize(input_size());
268  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
269  args.inputs = inputs.data();
270  args.ptr.first = 0;
271 }
272 
273 void compressed_input::reverse_init(Args<> &args) {
274  inputs.resize(input_size());
275  for (size_t i = 0; i < inputs.size(); i++)
276  inputs[i] = args.input(i) + input_diff[i];
277 
278  args.inputs = inputs.data();
279  args.ptr.first = 0;
280  args.ptr.second += m * nrep;
281  counter = nrep - 1;
282  update_increment_pattern();
283  args.ptr.first = input_size();
284 }
285 
286 void compressed_input::dependencies_intervals(Args<> &args,
287  std::vector<Index> &lower,
288  std::vector<Index> &upper) const {
289  forward_init(args);
290  lower = inputs;
291  upper = inputs;
292  for (size_t i = 0; i < nrep; i++) {
293  for (size_t j = 0; j < inputs.size(); j++) {
294  if (inputs[j] < lower[j]) lower[j] = inputs[j];
295  if (inputs[j] > upper[j]) upper[j] = inputs[j];
296  }
297  increment(args);
298  }
299 }
300 
301 bool compressed_input::test_period(std::vector<ptrdiff_t> &x, size_t p) {
302  for (size_t j = 0; j < x.size(); j++) {
303  if (x[j] != x[j % p]) return false;
304  }
305  return true;
306 }
307 
308 size_t compressed_input::find_shortest(std::vector<ptrdiff_t> &x) {
309  for (size_t p = 1; p < max_period_size; p++) {
310  if (test_period(x, p)) return p;
311  }
312  return x.size();
313 }
314 
315 compressed_input::compressed_input() {}
316 
317 compressed_input::compressed_input(std::vector<Index> &x, size_t offset,
318  size_t nrow, size_t m, size_t ncol,
319  size_t max_period_size)
320  : n(nrow), m(m), nrep(ncol), counter(0), max_period_size(max_period_size) {
321  matrix_view<Index> xm(&x[offset], nrow, ncol);
322 
323  for (size_t i = 0; i < nrow; i++) {
324  std::vector<ptrdiff_t> rd = xm.row_diff<ptrdiff_t>(i);
325 
326  size_t p = find_shortest(rd);
327 
328  increment_pattern.push_back(rd[0]);
329  if (p != 1) {
330  which_periodic.push_back(i);
331  period_sizes.push_back(p);
332 
333  size_t pos = std::search(period_data.begin(), period_data.end(),
334  rd.begin(), rd.begin() + p) -
335  period_data.begin();
336  if (pos < period_data.size()) {
337  period_offsets.push_back(pos);
338  } else {
339  period_offsets.push_back(period_data.size());
340  period_data.insert(period_data.end(), rd.begin(), rd.begin() + p);
341  }
342  }
343  }
344 
345  np = which_periodic.size();
346 
347  input_diff.resize(n, 0);
348  Args<> args(input_diff);
349  forward_init(args);
350  for (size_t i = 0; i < nrep; i++) {
351  increment(args);
352  }
353  input_diff = inputs;
354 }
355 
356 StackOp::StackOp(global *glob, period p, IndexPair ptr,
357  size_t max_period_size) {
358  opstack.resize(p.size);
359  size_t n = 0, m = 0;
360  for (size_t i = 0; i < p.size; i++) {
361  opstack[i] = glob->opstack[p.begin + i]->copy();
362  n += opstack[i]->input_size();
363  m += opstack[i]->output_size();
364  }
365  ci = compressed_input(glob->inputs, ptr.first, n, m, p.rep, max_period_size);
366 }
367 
368 StackOp::StackOp(const StackOp &x) : opstack(x.opstack), ci(x.ci) {}
369 
370 void StackOp::print(global::print_config cfg) {
371  std::vector<const char *> tmp(opstack.size());
372  for (size_t i = 0; i < opstack.size(); i++) tmp[i] = opstack[i]->op_name();
373  Rcout << cfg.prefix << " opstack = " << tmp << "\n";
374 
375  Rcout << cfg.prefix << " "
376  << "nrep"
377  << " = " << ci.nrep << "\n";
378  ;
379  Rcout << cfg.prefix << " "
380  << "increment_pattern"
381  << " = " << ci.increment_pattern << "\n";
382  ;
383  if (ci.which_periodic.size() > 0) {
384  Rcout << cfg.prefix << " "
385  << "which_periodic"
386  << " = " << ci.which_periodic << "\n";
387  ;
388  Rcout << cfg.prefix << " "
389  << "period_sizes"
390  << " = " << ci.period_sizes << "\n";
391  ;
392  Rcout << cfg.prefix << " "
393  << "period_offsets"
394  << " = " << ci.period_offsets << "\n";
395  ;
396  Rcout << cfg.prefix << " "
397  << "period_data"
398  << " = " << ci.period_data << "\n";
399  ;
400  }
401 
402  Rcout << "\n";
403 }
404 
405 Index StackOp::input_size() const { return ci.n; }
406 
407 Index StackOp::output_size() const { return ci.m * ci.nrep; }
408 
409 void StackOp::forward(ForwardArgs<Writer> &args) {
410  size_t n = ci.n, m = ci.m, nrep = ci.nrep;
411  std::vector<Index> inputs(n);
412  for (size_t i = 0; i < (size_t)n; i++) inputs[i] = args.input(i);
413  std::vector<Index> outputs(m);
414  for (size_t i = 0; i < (size_t)m; i++) outputs[i] = args.output(i);
415  Writer w;
416  size_t np = ci.which_periodic.size();
417  size_t sp = ci.period_data.size();
418  w << "for (int count = 0, ";
419  if (n > 0) {
420  w << "i[" << n << "]=" << inputs << ", "
421  << "ip[" << n << "]=" << ci.increment_pattern << ", ";
422  }
423  if (np > 0) {
424  w << "wp[" << np << "]=" << ci.which_periodic << ", "
425  << "ps[" << np << "]=" << ci.period_sizes << ", "
426  << "po[" << np << "]=" << ci.period_offsets << ", "
427  << "pd[" << sp << "]=" << ci.period_data << ", ";
428  }
429  w << "o[" << m << "]=" << outputs << "; "
430  << "count < " << nrep << "; count++) {\n";
431 
432  w << " ";
433  ForwardArgs<Writer> args_cpy = args;
434  args_cpy.set_indirect();
435  for (size_t k = 0; k < opstack.size(); k++) {
436  opstack[k]->forward_incr(args_cpy);
437  }
438  w << "\n";
439 
440  if (np > 0) {
441  w << " ";
442  for (size_t k = 0; k < np; k++)
443  w << "ip[wp[" << k << "]] = pd[po[" << k << "] + count % ps[" << k
444  << "]]; ";
445  w << "\n";
446  }
447  if (n > 0) {
448  w << " ";
449  for (size_t k = 0; k < n; k++) w << "i[" << k << "] += ip[" << k << "]; ";
450  w << "\n";
451  }
452  w << " ";
453  for (size_t k = 0; k < m; k++) w << "o[" << k << "] += " << m << "; ";
454  w << "\n";
455 
456  w << " ";
457  w << "}";
458 }
459 
460 void StackOp::reverse(ReverseArgs<Writer> &args) {
461  size_t n = ci.n, m = ci.m, nrep = ci.nrep;
462  std::vector<ptrdiff_t> inputs(input_size());
463  for (size_t i = 0; i < inputs.size(); i++) {
464  ptrdiff_t tmp;
465  if (-ci.input_diff[i] < ci.input_diff[i]) {
466  tmp = -((ptrdiff_t)-ci.input_diff[i]);
467  } else {
468  tmp = ci.input_diff[i];
469  }
470  inputs[i] = args.input(i) + tmp;
471  }
472  std::vector<Index> outputs(ci.m);
473  for (size_t i = 0; i < (size_t)ci.m; i++)
474  outputs[i] = args.output(i) + ci.m * ci.nrep;
475  Writer w;
476  size_t np = ci.which_periodic.size();
477  size_t sp = ci.period_data.size();
478  w << "for (int count = " << nrep << ", ";
479  if (n > 0) {
480  w << "i[" << n << "]=" << inputs << ", "
481  << "ip[" << n << "]=" << ci.increment_pattern << ", ";
482  }
483  if (np > 0) {
484  w << "wp[" << np << "]=" << ci.which_periodic << ", "
485  << "ps[" << np << "]=" << ci.period_sizes << ", "
486  << "po[" << np << "]=" << ci.period_offsets << ", "
487  << "pd[" << sp << "]=" << ci.period_data << ", ";
488  }
489  w << "o[" << m << "]=" << outputs << "; "
490  << "count > 0 ; ) {\n";
491 
492  w << " ";
493  w << "count--;\n";
494  if (np > 0) {
495  w << " ";
496  for (size_t k = 0; k < np; k++)
497  w << "ip[wp[" << k << "]] = pd[po[" << k << "] + count % ps[" << k
498  << "]]; ";
499  w << "\n";
500  }
501  if (n > 0) {
502  w << " ";
503  for (size_t k = 0; k < n; k++) w << "i[" << k << "] -= ip[" << k << "]; ";
504  w << "\n";
505  }
506  w << " ";
507  for (size_t k = 0; k < m; k++) w << "o[" << k << "] -= " << m << "; ";
508  w << "\n";
509 
510  w << " ";
511 
512  ReverseArgs<Writer> args_cpy = args;
513  args_cpy.set_indirect();
514  args_cpy.ptr.first = ci.n;
515  args_cpy.ptr.second = ci.m;
516  for (size_t k = opstack.size(); k > 0;) {
517  k--;
518  opstack[k]->reverse_decr(args_cpy);
519  }
520  w << "\n";
521 
522  w << " ";
523  w << "}";
524 }
525 
526 void StackOp::dependencies(Args<> args, Dependencies &dep) const {
527  std::vector<Index> lower;
528  std::vector<Index> upper;
529  ci.dependencies_intervals(args, lower, upper);
530  for (size_t i = 0; i < lower.size(); i++) {
531  dep.add_interval(lower[i], upper[i]);
532  }
533 }
534 
535 const char *StackOp::op_name() { return "StackOp"; }
536 
539  cfg.strong_inv = false;
540  cfg.strong_const = false;
541  cfg.strong_output = false;
542  cfg.reduce = false;
543  cfg.deterministic = false;
544  std::vector<hash_t> h = glob.hash_sweep(cfg);
545  std::vector<Index> remap = radix::first_occurance<Index>(h);
546 
547  TMBAD_ASSERT(all_allow_remap(glob));
548 
549  Args<> args(glob.inputs);
550  for (size_t i = 0; i < glob.opstack.size(); i++) {
551  Dependencies dep;
552  glob.opstack[i]->dependencies(args, dep);
553 
554  Index var = args.ptr.second;
555  toposort_remap<Index> fb(remap, var);
556  dep.apply(fb);
557  glob.opstack[i]->increment(args.ptr);
558  }
559 
560  std::vector<Index> ord = radix::order<Index>(remap);
561  std::vector<Index> v2o = glob.var2op();
562  glob.subgraph_seq = subset(v2o, ord);
563 
564  glob = glob.extract_sub();
565 }
566 
568  std::vector<Index> remap(glob.values.size(), Index(-1));
569  Args<> args(glob.inputs);
570  for (size_t i = 0; i < glob.opstack.size(); i++) {
571  Dependencies dep;
572  glob.opstack[i]->dependencies(args, dep);
573  sort_unique_inplace(dep);
574  Index var = args.ptr.second;
575  temporaries_remap<Index> fb(remap, var);
576  dep.apply(fb);
577  glob.opstack[i]->increment(args.ptr);
578  }
579 
580  for (size_t i = remap.size(); i > 0;) {
581  i--;
582  if (remap[i] == Index(-1))
583  remap[i] = i;
584  else
585  remap[i] = remap[remap[i]];
586  }
587 
588  std::vector<Index> ord = radix::order<Index>(remap);
589  std::vector<Index> v2o = glob.var2op();
590  glob.subgraph_seq = subset(v2o, ord);
591 
592  glob = glob.extract_sub();
593 }
594 
596  std::vector<bool> visited(glob.opstack.size(), false);
597  std::vector<Index> v2o = glob.var2op();
598  std::vector<Index> stack;
599  std::vector<Index> result;
600  Args<> args(glob.inputs);
601  glob.subgraph_cache_ptr();
602  for (size_t k = 0; k < glob.dep_index.size(); k++) {
603  Index dep_var = glob.dep_index[k];
604  Index i = v2o[dep_var];
605 
606  stack.push_back(i);
607  visited[i] = true;
608  while (stack.size() > 0) {
609  Index i = stack.back();
610  args.ptr = glob.subgraph_ptr[i];
611  Dependencies dep;
612  glob.opstack[i]->dependencies(args, dep);
613  dfs_add_to_stack<Index> add_to_stack(stack, visited, v2o);
614  size_t before = stack.size();
615  dep.apply(add_to_stack);
616  size_t after = stack.size();
617  if (before == after) {
618  result.push_back(i);
619  stack.pop_back();
620  }
621  }
622  }
623 
624  glob.subgraph_seq = result;
625  glob = glob.extract_sub();
626 
627  glob.shrink_to_fit();
628 }
629 
630 void compress(global &glob, size_t max_period_size) {
631  size_t min_period_rep = TMBAD_MIN_PERIOD_REP;
632  periodic<global::OperatorPure *> p(glob.opstack, max_period_size,
633  min_period_rep);
634  std::vector<period> periods = p.find_all();
635 
636  std::vector<period> periods_expand;
637  for (size_t i = 0; i < periods.size(); i++) {
638  std::vector<period> tmp = split_period(&glob, periods[i], max_period_size);
639 
640  if (tmp.size() > 10) {
641  tmp.resize(0);
642  tmp.push_back(periods[i]);
643  }
644 
645  for (size_t j = 0; j < tmp.size(); j++) {
646  if (tmp[j].rep > 1) periods_expand.push_back(tmp[j]);
647  }
648  }
649 
650  std::swap(periods, periods_expand);
651  OperatorPure *null_op = get_glob()->getOperator<global::NullOp>();
652  IndexPair ptr(0, 0);
653  Index k = 0;
654  for (size_t i = 0; i < periods.size(); i++) {
655  period p = periods[i];
656  TMBAD_ASSERT(p.rep >= 1);
657  while (k < p.begin) {
658  glob.opstack[k]->increment(ptr);
659  k++;
660  }
661 
662  OperatorPure *pOp =
663  get_glob()->getOperator<StackOp>(&glob, p, ptr, max_period_size);
664  Index ninp = 0;
665  for (size_t j = 0; j < p.size * p.rep; j++) {
666  ninp += glob.opstack[p.begin + j]->input_size();
667  glob.opstack[p.begin + j]->deallocate();
668  glob.opstack[p.begin + j] = null_op;
669  }
670  glob.opstack[p.begin] = pOp;
671  ninp -= pOp->input_size();
672  glob.opstack[p.begin + 1] =
673  get_glob()->getOperator<global::NullOp2>(ninp, 0);
674  }
675 
676  std::vector<bool> marks(glob.values.size(), true);
677  glob.extract_sub_inplace(marks);
678  glob.shrink_to_fit();
679 }
680 } // namespace TMBad
681 // Autogenerated - do not edit by hand !
682 #include "global.hpp"
683 namespace TMBad {
684 
685 global *global_ptr_data[TMBAD_MAX_NUM_THREADS] = {NULL};
686 global **global_ptr = global_ptr_data;
687 std::ostream *Writer::cout = 0;
688 bool global::fuse = 0;
689 
690 global *get_glob() { return global_ptr[TMBAD_THREAD_NUM]; }
691 
692 Dependencies::Dependencies() {}
693 
694 void Dependencies::clear() {
695  this->resize(0);
696  I.resize(0);
697 }
698 
699 void Dependencies::add_interval(Index a, Index b) {
700  I.push_back(std::pair<Index, Index>(a, b));
701 }
702 
703 void Dependencies::add_segment(Index start, Index size) {
704  if (size > 0) add_interval(start, start + size - 1);
705 }
706 
707 void Dependencies::monotone_transform_inplace(const std::vector<Index> &x) {
708  for (size_t i = 0; i < this->size(); i++) (*this)[i] = x[(*this)[i]];
709  for (size_t i = 0; i < I.size(); i++) {
710  I[i].first = x[I[i].first];
711  I[i].second = x[I[i].second];
712  }
713 }
714 
715 bool Dependencies::any(const std::vector<bool> &x) const {
716  for (size_t i = 0; i < this->size(); i++)
717  if (x[(*this)[i]]) return true;
718  for (size_t i = 0; i < I.size(); i++) {
719  for (Index j = I[i].first; j <= I[i].second; j++) {
720  if (x[j]) return true;
721  }
722  }
723  return false;
724 }
725 
726 std::string tostr(const Index &x) {
727  std::ostringstream strs;
728  strs << x;
729  return strs.str();
730 }
731 
732 std::string tostr(const Scalar &x) {
733  std::ostringstream strs;
734  strs << x;
735  return strs.str();
736 }
737 
738 Writer::Writer(std::string str) : std::string(str) {}
739 
740 Writer::Writer(Scalar x) : std::string(tostr(x)) {}
741 
742 Writer::Writer() {}
743 
744 std::string Writer::p(std::string x) { return "(" + x + ")"; }
745 
746 Writer Writer::operator+(const Writer &other) {
747  return p(*this + " + " + other);
748 }
749 
750 Writer Writer::operator-(const Writer &other) {
751  return p(*this + " - " + other);
752 }
753 
754 Writer Writer::operator-() { return " - " + *this; }
755 
756 Writer Writer::operator*(const Writer &other) { return *this + " * " + other; }
757 
758 Writer Writer::operator/(const Writer &other) { return *this + " / " + other; }
759 
760 Writer Writer::operator*(const Scalar &other) {
761  return *this + "*" + tostr(other);
762 }
763 
764 Writer Writer::operator+(const Scalar &other) {
765  return p(*this + "+" + tostr(other));
766 }
767 
768 void Writer::operator=(const Writer &other) {
769  *cout << *this + " = " + other << ";";
770 }
771 
772 void Writer::operator+=(const Writer &other) {
773  *cout << *this + " += " + other << ";";
774 }
775 
776 void Writer::operator-=(const Writer &other) {
777  *cout << *this + " -= " + other << ";";
778 }
779 
780 void Writer::operator*=(const Writer &other) {
781  *cout << *this + " *= " + other << ";";
782 }
783 
784 void Writer::operator/=(const Writer &other) {
785  *cout << *this + " /= " + other << ";";
786 }
787 
788 Position::Position(Index node, Index first, Index second)
789  : node(node), ptr(first, second) {}
790 
791 Position::Position() : node(0), ptr(0, 0) {}
792 
793 bool Position::operator<(const Position &other) const {
794  return this->node < other.node;
795 }
796 
797 graph::graph() {}
798 
799 size_t graph::num_neighbors(Index node) { return p[node + 1] - p[node]; }
800 
801 Index *graph::neighbors(Index node) { return &(j[p[node]]); }
802 
803 bool graph::empty() { return p.size() == 0; }
804 
805 size_t graph::num_nodes() { return (empty() ? 0 : p.size() - 1); }
806 
807 void graph::print() {
808  for (size_t node = 0; node < num_nodes(); node++) {
809  Rcout << node << ": ";
810  for (size_t i = 0; i < num_neighbors(node); i++) {
811  Rcout << " " << neighbors(node)[i];
812  }
813  Rcout << "\n";
814  }
815 }
816 
817 std::vector<Index> graph::rowcounts() {
818  std::vector<Index> ans(num_nodes());
819  for (size_t i = 0; i < ans.size(); i++) ans[i] = num_neighbors(i);
820  return ans;
821 }
822 
823 std::vector<Index> graph::colcounts() {
824  std::vector<Index> ans(num_nodes());
825  for (size_t i = 0; i < j.size(); i++) ans[j[i]]++;
826  return ans;
827 }
828 
829 void graph::bfs(const std::vector<Index> &start, std::vector<bool> &visited,
830  std::vector<Index> &result) {
831  for (size_t i = 0; i < start.size(); i++) {
832  Index node = start[i];
833  for (size_t j_ = 0; j_ < num_neighbors(node); j_++) {
834  Index k = neighbors(node)[j_];
835  if (!visited[k]) {
836  result.push_back(k);
837  visited[k] = true;
838  }
839  }
840  }
841 }
842 
843 void graph::search(std::vector<Index> &start, bool sort_input,
844  bool sort_output) {
845  if (mark.size() == 0) mark.resize(num_nodes(), false);
846 
847  search(start, mark, sort_input, sort_output);
848 
849  for (size_t i = 0; i < start.size(); i++) mark[start[i]] = false;
850 }
851 
852 void graph::search(std::vector<Index> &start, std::vector<bool> &visited,
853  bool sort_input, bool sort_output) {
854  if (sort_input) sort_unique_inplace(start);
855 
856  for (size_t i = 0; i < start.size(); i++) visited[start[i]] = true;
857 
858  bfs(start, visited, start);
859 
860  if (sort_output) sort_inplace(start);
861 }
862 
863 std::vector<Index> graph::boundary(const std::vector<Index> &subgraph) {
864  if (mark.size() == 0) mark.resize(num_nodes(), false);
865 
866  std::vector<Index> boundary;
867 
868  for (size_t i = 0; i < subgraph.size(); i++) mark[subgraph[i]] = true;
869 
870  bfs(subgraph, mark, boundary);
871 
872  for (size_t i = 0; i < subgraph.size(); i++) mark[subgraph[i]] = false;
873  for (size_t i = 0; i < boundary.size(); i++) mark[boundary[i]] = false;
874 
875  return boundary;
876 }
877 
878 graph::graph(size_t num_nodes, const std::vector<IndexPair> &edges) {
879  std::vector<IndexPair>::const_iterator it;
880  std::vector<Index> row_counts(num_nodes, 0);
881  for (it = edges.begin(); it != edges.end(); it++) {
882  row_counts[it->first]++;
883  }
884 
885  p.resize(num_nodes + 1);
886  p[0] = 0;
887  for (size_t i = 0; i < num_nodes; i++) {
888  p[i + 1] = p[i] + row_counts[i];
889  }
890 
891  std::vector<Index> k(p);
892  j.resize(edges.size());
893  for (it = edges.begin(); it != edges.end(); it++) {
894  j[k[it->first]++] = it->second;
895  }
896 }
897 
898 op_info::op_info() : code(0) {
899  static_assert(sizeof(IntRep) * 8 >= op_flag_count,
900  "'IntRep' not wide enough!");
901 }
902 
903 op_info::op_info(op_flag f) : code(1 << f) {}
904 
905 bool op_info::test(op_flag f) const { return code & 1 << f; }
906 
907 op_info &op_info::operator|=(const op_info &other) {
908  code |= other.code;
909  return *this;
910 }
911 
912 op_info &op_info::operator&=(const op_info &other) {
913  code &= other.code;
914  return *this;
915 }
916 
917 global::operation_stack::operation_stack() {}
918 
919 global::operation_stack::operation_stack(const operation_stack &other) {
920  (*this).copy_from(other);
921 }
922 
923 void global::operation_stack::push_back(OperatorPure *x) {
924  Base::push_back(x);
925 
926  any |= x->info();
927 }
928 
929 operation_stack &global::operation_stack::operator=(
930  const operation_stack &other) {
931  if (this != &other) {
932  (*this).clear();
933  (*this).copy_from(other);
934  }
935  return *this;
936 }
937 
938 global::operation_stack::~operation_stack() { (*this).clear(); }
939 
940 void global::operation_stack::clear() {
941  if (any.test(op_info::dynamic)) {
942  for (size_t i = 0; i < (*this).size(); i++) (*this)[i]->deallocate();
943  }
944  (*this).resize(0);
945 }
946 
947 void global::operation_stack::copy_from(const operation_stack &other) {
948  if (other.any.test(op_info::dynamic)) {
949  for (size_t i = 0; i < other.size(); i++) Base::push_back(other[i]->copy());
950  } else {
951  Base::operator=(other);
952  }
953  this->any = other.any;
954 }
955 
956 global::global()
957  : forward_compiled(NULL),
958  reverse_compiled(NULL),
959  parent_glob(NULL),
960  in_use(false) {}
961 
962 void global::clear() {
963  values.resize(0);
964  derivs.resize(0);
965  inputs.resize(0);
966  inv_index.resize(0);
967  dep_index.resize(0);
968  subgraph_ptr.resize(0);
969  subgraph_seq.resize(0);
970  opstack.clear();
971 }
972 
973 void global::shrink_to_fit(double tol) {
974  std::vector<Scalar>().swap(derivs);
975  std::vector<IndexPair>().swap(subgraph_ptr);
976  if (values.size() < tol * values.capacity())
977  std::vector<Scalar>(values).swap(values);
978  if (inputs.size() < tol * inputs.capacity())
979  std::vector<Index>(inputs).swap(inputs);
980  if (opstack.size() < tol * opstack.capacity())
981  std::vector<OperatorPure *>(opstack).swap(opstack);
982 }
983 
984 void global::clear_deriv(Position start) {
985  derivs.resize(values.size());
986  std::fill(derivs.begin() + start.ptr.second, derivs.end(), 0);
987 }
988 
989 Scalar &global::value_inv(Index i) { return values[inv_index[i]]; }
990 
991 Scalar &global::deriv_inv(Index i) { return derivs[inv_index[i]]; }
992 
993 Scalar &global::value_dep(Index i) { return values[dep_index[i]]; }
994 
995 Scalar &global::deriv_dep(Index i) { return derivs[dep_index[i]]; }
996 
997 Position global::begin() { return Position(0, 0, 0); }
998 
999 Position global::end() {
1000  return Position(opstack.size(), inputs.size(), values.size());
1001 }
1002 
1003 CONSTEXPR bool global::no_filter::operator[](size_t i) const { return true; }
1004 
1005 void global::forward(Position start) {
1006  if (forward_compiled != NULL) {
1007  forward_compiled(values.data());
1008  return;
1009  }
1010  ForwardArgs<Scalar> args(inputs, values, this);
1011  args.ptr = start.ptr;
1012  forward_loop(args, start.node);
1013 }
1014 
1015 void global::reverse(Position start) {
1016  if (reverse_compiled != NULL) {
1017  reverse_compiled(values.data(), derivs.data());
1018  return;
1019  }
1020  ReverseArgs<Scalar> args(inputs, values, derivs, this);
1021  reverse_loop(args, start.node);
1022 }
1023 
1024 void global::forward_sub() {
1025  ForwardArgs<Scalar> args(inputs, values, this);
1026  forward_loop_subgraph(args);
1027 }
1028 
1029 void global::reverse_sub() {
1030  ReverseArgs<Scalar> args(inputs, values, derivs, this);
1031  reverse_loop_subgraph(args);
1032 }
1033 
1034 void global::forward(std::vector<bool> &marks) {
1035  intervals<Index> marked_intervals;
1036  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1037  forward_loop(args);
1038 }
1039 
1040 void global::reverse(std::vector<bool> &marks) {
1041  intervals<Index> marked_intervals;
1042  ReverseArgs<bool> args(inputs, marks, marked_intervals);
1043  reverse_loop(args);
1044 }
1045 
1046 void global::forward_sub(std::vector<bool> &marks,
1047  const std::vector<bool> &node_filter) {
1048  intervals<Index> marked_intervals;
1049  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1050  if (node_filter.size() == 0)
1051  forward_loop_subgraph(args);
1052  else
1053  forward_loop(args, 0, node_filter);
1054 }
1055 
1056 void global::reverse_sub(std::vector<bool> &marks,
1057  const std::vector<bool> &node_filter) {
1058  intervals<Index> marked_intervals;
1059  ReverseArgs<bool> args(inputs, marks, marked_intervals);
1060  if (node_filter.size() == 0)
1061  reverse_loop_subgraph(args);
1062  else
1063  reverse_loop(args, 0, node_filter);
1064 }
1065 
1066 void global::forward_dense(std::vector<bool> &marks) {
1067  intervals<Index> marked_intervals;
1068  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1069  for (size_t i = 0; i < opstack.size(); i++) {
1070  opstack[i]->forward_incr_mark_dense(args);
1071  }
1072 }
1073 
1074 intervals<Index> global::updating_intervals() const {
1075  Dependencies dep;
1076  intervals<Index> marked_intervals;
1077  Args<> args(inputs);
1078  for (size_t i = 0; i < opstack.size(); i++) {
1079  if (opstack[i]->info().test(op_info::updating)) {
1080  dep.clear();
1081  opstack[i]->dependencies(args, dep);
1082 
1083  for (size_t i = 0; i < dep.I.size(); i++) {
1084  Index a = dep.I[i].first;
1085  Index b = dep.I[i].second;
1086  marked_intervals.insert(a, b);
1087  }
1088  }
1089  opstack[i]->increment(args.ptr);
1090  }
1091  return marked_intervals;
1092 }
1093 
1094 intervals<Index> global::updating_intervals_sub() const {
1095  Dependencies dep;
1096  intervals<Index> marked_intervals;
1097  Args<> args(inputs);
1098  subgraph_cache_ptr();
1099  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1100  Index i = subgraph_seq[j];
1101  args.ptr = subgraph_ptr[i];
1102  if (opstack[i]->info().test(op_info::updating)) {
1103  dep.clear();
1104  opstack[i]->dependencies(args, dep);
1105 
1106  for (size_t i = 0; i < dep.I.size(); i++) {
1107  Index a = dep.I[i].first;
1108  Index b = dep.I[i].second;
1109  marked_intervals.insert(a, b);
1110  }
1111  }
1112  }
1113  return marked_intervals;
1114 }
1115 
1116 Replay &global::replay::value_inv(Index i) { return values[orig.inv_index[i]]; }
1117 
1118 Replay &global::replay::deriv_inv(Index i) { return derivs[orig.inv_index[i]]; }
1119 
1120 Replay &global::replay::value_dep(Index i) { return values[orig.dep_index[i]]; }
1121 
1122 Replay &global::replay::deriv_dep(Index i) { return derivs[orig.dep_index[i]]; }
1123 
1124 global::replay::replay(const global &orig, global &target)
1125  : orig(orig), target(target) {
1126  TMBAD_ASSERT(&orig != &target);
1127 }
1128 
1129 void global::replay::start() {
1130  parent_glob = get_glob();
1131  if (&target != parent_glob) target.ad_start();
1132  values = std::vector<Replay>(orig.values.begin(), orig.values.end());
1133 }
1134 
1135 void global::replay::stop() {
1136  if (&target != parent_glob) target.ad_stop();
1137  TMBAD_ASSERT(parent_glob == get_glob());
1138 }
1139 
1140 void global::replay::add_updatable_derivs(const intervals<Index> &I) {
1141  struct {
1142  Replay *p;
1143  void operator()(Index a, Index b) {
1144  Index n = b - a + 1;
1145  global::ZeroOp Z(n);
1146  Z(p + a, n);
1147  }
1148  } F = {derivs.data()};
1149  I.apply(F);
1150 }
1151 
1152 void global::replay::clear_deriv() {
1153  derivs.resize(values.size());
1154  std::fill(derivs.begin(), derivs.end(), Replay(0));
1155 
1156  if (orig.opstack.any.test(op_info::updating)) {
1157  intervals<Index> I = orig.updating_intervals();
1158  add_updatable_derivs(I);
1159  }
1160 }
1161 
1162 void global::replay::forward(bool inv_tags, bool dep_tags, Position start,
1163  const std::vector<bool> &node_filter) {
1164  TMBAD_ASSERT(&target == get_glob());
1165  if (inv_tags) {
1166  for (size_t i = 0; i < orig.inv_index.size(); i++)
1167  value_inv(i).Independent();
1168  }
1169  ForwardArgs<Replay> args(orig.inputs, values);
1170  if (node_filter.size() > 0) {
1171  TMBAD_ASSERT(node_filter.size() == orig.opstack.size());
1172  orig.forward_loop(args, start.node, node_filter);
1173  } else {
1174  orig.forward_loop(args, start.node);
1175  }
1176  if (dep_tags) {
1177  for (size_t i = 0; i < orig.dep_index.size(); i++) value_dep(i).Dependent();
1178  }
1179 }
1180 
1181 void global::replay::reverse(bool dep_tags, bool inv_tags, Position start,
1182  const std::vector<bool> &node_filter) {
1183  TMBAD_ASSERT(&target == get_glob());
1184  if (inv_tags) {
1185  for (size_t i = 0; i < orig.dep_index.size(); i++)
1186  deriv_dep(i).Independent();
1187  }
1188  ReverseArgs<Replay> args(orig.inputs, values, derivs);
1189  if (node_filter.size() > 0) {
1190  TMBAD_ASSERT(node_filter.size() == orig.opstack.size());
1191  orig.reverse_loop(args, start.node, node_filter);
1192  } else {
1193  orig.reverse_loop(args, start.node);
1194  }
1195 
1196  std::fill(derivs.begin(), derivs.begin() + start.ptr.second, Replay(0));
1197  if (dep_tags) {
1198  for (size_t i = 0; i < orig.inv_index.size(); i++) deriv_inv(i).Dependent();
1199  }
1200 }
1201 
1202 void global::replay::forward_sub() {
1203  ForwardArgs<Replay> args(orig.inputs, values);
1204  orig.forward_loop_subgraph(args);
1205 }
1206 
1207 void global::replay::reverse_sub() {
1208  ReverseArgs<Replay> args(orig.inputs, values, derivs);
1209  orig.reverse_loop_subgraph(args);
1210 }
1211 
1212 void global::replay::clear_deriv_sub() {
1213  orig.clear_array_subgraph(derivs);
1214 
1215  if (orig.opstack.any.test(op_info::updating)) {
1216  intervals<Index> I = orig.updating_intervals_sub();
1217  add_updatable_derivs(I);
1218  }
1219 }
1220 
1221 void global::forward_replay(bool inv_tags, bool dep_tags) {
1222  global new_glob;
1223  global::replay replay(*this, new_glob);
1224  replay.start();
1225  replay.forward(inv_tags, dep_tags);
1226  replay.stop();
1227  *this = new_glob;
1228 }
1229 
1230 void global::subgraph_cache_ptr() const {
1231  if (subgraph_ptr.size() == opstack.size()) return;
1232  TMBAD_ASSERT(subgraph_ptr.size() < opstack.size());
1233  if (subgraph_ptr.size() == 0) subgraph_ptr.push_back(IndexPair(0, 0));
1234  for (size_t i = subgraph_ptr.size(); i < opstack.size(); i++) {
1235  IndexPair ptr = subgraph_ptr[i - 1];
1236  opstack[i - 1]->increment(ptr);
1237  subgraph_ptr.push_back(ptr);
1238  }
1239 }
1240 
1241 void global::set_subgraph(const std::vector<bool> &marks, bool append) {
1242  std::vector<Index> v2o = var2op();
1243  if (!append) subgraph_seq.resize(0);
1244  Index previous = (Index)-1;
1245  for (size_t i = 0; i < marks.size(); i++) {
1246  if (marks[i] && (v2o[i] != previous)) {
1247  subgraph_seq.push_back(v2o[i]);
1248  previous = v2o[i];
1249  }
1250  }
1251 }
1252 
1253 void global::mark_subgraph(std::vector<bool> &marks) {
1254  TMBAD_ASSERT(marks.size() == values.size());
1255  clear_array_subgraph(marks, true);
1256 }
1257 
1258 void global::unmark_subgraph(std::vector<bool> &marks) {
1259  TMBAD_ASSERT(marks.size() == values.size());
1260  clear_array_subgraph(marks, false);
1261 }
1262 
1263 void global::subgraph_trivial() {
1264  subgraph_cache_ptr();
1265  subgraph_seq.resize(0);
1266  for (size_t i = 0; i < opstack.size(); i++) subgraph_seq.push_back(i);
1267 }
1268 
1269 void global::clear_deriv_sub() { clear_array_subgraph(derivs); }
1270 
1271 global global::extract_sub(std::vector<Index> &var_remap, global new_glob) {
1272  subgraph_cache_ptr();
1273  TMBAD_ASSERT(var_remap.size() == 0 || var_remap.size() == values.size());
1274  var_remap.resize(values.size(), 0);
1275  std::vector<bool> independent_variable = inv_marks();
1276  std::vector<bool> dependent_variable = dep_marks();
1277  ForwardArgs<Scalar> args(inputs, values, this);
1278  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1279  Index i = subgraph_seq[j];
1280  args.ptr = subgraph_ptr[i];
1281 
1282  size_t nout = opstack[i]->output_size();
1283  for (size_t k = 0; k < nout; k++) {
1284  Index new_index = new_glob.values.size();
1285  Index old_index = args.output(k);
1286  var_remap[old_index] = new_index;
1287  new_glob.values.push_back(args.y(k));
1288  if (independent_variable[old_index]) {
1289  independent_variable[old_index] = false;
1290  }
1291  if (dependent_variable[old_index]) {
1292  dependent_variable[old_index] = false;
1293  }
1294  }
1295 
1296  size_t nin = opstack[i]->input_size();
1297  for (size_t k = 0; k < nin; k++) {
1298  new_glob.inputs.push_back(var_remap[args.input(k)]);
1299  }
1300 
1301  new_glob.opstack.push_back(opstack[i]->copy());
1302  }
1303 
1304  independent_variable.flip();
1305  dependent_variable.flip();
1306 
1307  for (size_t i = 0; i < inv_index.size(); i++) {
1308  Index old_var = inv_index[i];
1309  if (independent_variable[old_var])
1310  new_glob.inv_index.push_back(var_remap[old_var]);
1311  }
1312  for (size_t i = 0; i < dep_index.size(); i++) {
1313  Index old_var = dep_index[i];
1314  if (dependent_variable[old_var])
1315  new_glob.dep_index.push_back(var_remap[old_var]);
1316  }
1317  return new_glob;
1318 }
1319 
1320 void global::extract_sub_inplace(std::vector<bool> marks) {
1321  TMBAD_ASSERT(marks.size() == values.size());
1322  std::vector<Index> var_remap(values.size(), 0);
1323  std::vector<bool> independent_variable = inv_marks();
1324  std::vector<bool> dependent_variable = dep_marks();
1325  intervals<Index> marked_intervals;
1326  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1327  size_t s = 0, s_input = 0;
1328  std::vector<bool> opstack_deallocate(opstack.size(), false);
1329 
1330  for (size_t i = 0; i < opstack.size(); i++) {
1331  op_info info = opstack[i]->info();
1332 
1333  size_t nout = opstack[i]->output_size();
1334  bool any_marked_output = info.test(op_info::elimination_protected);
1335  for (size_t j = 0; j < nout; j++) {
1336  any_marked_output |= args.y(j);
1337  }
1338  if (info.test(op_info::updating) && nout == 0) {
1339  Dependencies dep;
1340  opstack[i]->dependencies_updating(args, dep);
1341  any_marked_output |= dep.any(args.values);
1342  }
1343 
1344  if (any_marked_output) {
1345  for (size_t k = 0; k < nout; k++) {
1346  Index new_index = s;
1347  Index old_index = args.output(k);
1348  var_remap[old_index] = new_index;
1349  values[new_index] = values[old_index];
1350  if (independent_variable[old_index]) {
1351  independent_variable[old_index] = false;
1352  }
1353  if (dependent_variable[old_index]) {
1354  dependent_variable[old_index] = false;
1355  }
1356  s++;
1357  }
1358 
1359  size_t nin = opstack[i]->input_size();
1360  for (size_t k = 0; k < nin; k++) {
1361  inputs[s_input] = var_remap[args.input(k)];
1362  s_input++;
1363  }
1364  }
1365  opstack[i]->increment(args.ptr);
1366  if (!any_marked_output) {
1367  opstack_deallocate[i] = true;
1368  }
1369  }
1370 
1371  independent_variable.flip();
1372  dependent_variable.flip();
1373  std::vector<Index> new_inv_index;
1374  for (size_t i = 0; i < inv_index.size(); i++) {
1375  Index old_var = inv_index[i];
1376  if (independent_variable[old_var])
1377  new_inv_index.push_back(var_remap[old_var]);
1378  }
1379  inv_index = new_inv_index;
1380  std::vector<Index> new_dep_index;
1381  for (size_t i = 0; i < dep_index.size(); i++) {
1382  Index old_var = dep_index[i];
1383  if (dependent_variable[old_var])
1384  new_dep_index.push_back(var_remap[old_var]);
1385  }
1386  dep_index = new_dep_index;
1387 
1388  inputs.resize(s_input);
1389  values.resize(s);
1390  size_t k = 0;
1391  for (size_t i = 0; i < opstack.size(); i++) {
1392  if (opstack_deallocate[i]) {
1393  opstack[i]->deallocate();
1394  } else {
1395  opstack[k] = opstack[i];
1396  k++;
1397  }
1398  }
1399  opstack.resize(k);
1400 
1401  if (opstack.any.test(op_info::dynamic)) this->forward();
1402 }
1403 
1404 global global::extract_sub() {
1405  std::vector<Index> var_remap;
1406  return extract_sub(var_remap);
1407 }
1408 
1409 std::vector<Index> global::var2op() {
1410  std::vector<Index> var2op(values.size());
1411  Args<> args(inputs);
1412  size_t j = 0;
1413  for (size_t i = 0; i < opstack.size(); i++) {
1414  opstack[i]->increment(args.ptr);
1415  for (; j < (size_t)args.ptr.second; j++) {
1416  var2op[j] = i;
1417  }
1418  }
1419  return var2op;
1420 }
1421 
1422 std::vector<bool> global::var2op(const std::vector<bool> &values) {
1423  std::vector<bool> ans(opstack.size(), false);
1424  Args<> args(inputs);
1425  size_t j = 0;
1426  for (size_t i = 0; i < opstack.size(); i++) {
1427  opstack[i]->increment(args.ptr);
1428  for (; j < (size_t)args.ptr.second; j++) {
1429  ans[i] = ans[i] || values[j];
1430  }
1431  }
1432  return ans;
1433 }
1434 
1435 std::vector<Index> global::op2var(const std::vector<Index> &seq) {
1436  std::vector<bool> seq_mark = mark_space(opstack.size(), seq);
1437  std::vector<Index> ans;
1438  Args<> args(inputs);
1439  size_t j = 0;
1440  for (size_t i = 0; i < opstack.size(); i++) {
1441  opstack[i]->increment(args.ptr);
1442  for (; j < (size_t)args.ptr.second; j++) {
1443  if (seq_mark[i]) ans.push_back(j);
1444  }
1445  }
1446  return ans;
1447 }
1448 
1449 std::vector<bool> global::op2var(const std::vector<bool> &seq_mark) {
1450  std::vector<bool> ans(values.size());
1451  Args<> args(inputs);
1452  size_t j = 0;
1453  for (size_t i = 0; i < opstack.size(); i++) {
1454  opstack[i]->increment(args.ptr);
1455  for (; j < (size_t)args.ptr.second; j++) {
1456  if (seq_mark[i]) ans[j] = true;
1457  }
1458  }
1459  return ans;
1460 }
1461 
1462 std::vector<Index> global::op2idx(const std::vector<Index> &var_subset,
1463  Index NA) {
1464  std::vector<Index> v2o = var2op();
1465  std::vector<Index> op2idx(opstack.size(), NA);
1466  for (size_t i = var_subset.size(); i > 0;) {
1467  i--;
1468  op2idx[v2o[var_subset[i]]] = i;
1469  }
1470  return op2idx;
1471 }
1472 
1473 std::vector<bool> global::mark_space(size_t n, const std::vector<Index> ind) {
1474  std::vector<bool> mark(n, false);
1475  for (size_t i = 0; i < ind.size(); i++) {
1476  mark[ind[i]] = true;
1477  }
1478  return mark;
1479 }
1480 
1481 std::vector<bool> global::inv_marks() {
1482  return mark_space(values.size(), inv_index);
1483 }
1484 
1485 std::vector<bool> global::dep_marks() {
1486  return mark_space(values.size(), dep_index);
1487 }
1488 
1489 std::vector<bool> global::subgraph_marks() {
1490  return mark_space(opstack.size(), subgraph_seq);
1491 }
1492 
1493 global::append_edges::append_edges(size_t &i, size_t num_nodes,
1494  const std::vector<bool> &keep_var,
1495  std::vector<Index> &var2op,
1496  std::vector<IndexPair> &edges)
1497  : i(i),
1498  keep_var(keep_var),
1499  var2op(var2op),
1500  edges(edges),
1501  op_marks(num_nodes, false),
1502  pos(0) {}
1503 
1504 void global::append_edges::operator()(Index dep_j) {
1505  if (keep_var[dep_j]) {
1506  size_t k = var2op[dep_j];
1507  if (i != k && !op_marks[k]) {
1508  IndexPair edge;
1509 
1510  edge.first = k;
1511  edge.second = i;
1512  edges.push_back(edge);
1513  op_marks[k] = true;
1514  }
1515  }
1516 }
1517 
1518 void global::append_edges::start_iteration() { pos = edges.size(); }
1519 
1520 void global::append_edges::end_iteration() {
1521  size_t n = edges.size() - pos;
1522  for (size_t j = 0; j < n; j++) op_marks[edges[pos + j].first] = false;
1523 }
1524 
1525 graph global::build_graph(bool transpose, const std::vector<bool> &keep_var) {
1526  TMBAD_ASSERT(keep_var.size() == values.size());
1527 
1528  std::vector<Index> var2op = this->var2op();
1529 
1530  bool any_updating = false;
1531 
1532  Args<> args(inputs);
1533  std::vector<IndexPair> edges;
1534  Dependencies dep;
1535  size_t i = 0;
1536  append_edges F(i, opstack.size(), keep_var, var2op, edges);
1537  for (; i < opstack.size(); i++) {
1538  any_updating |= opstack[i]->info().test(op_info::updating);
1539  dep.clear();
1540  opstack[i]->dependencies(args, dep);
1541  F.start_iteration();
1542  dep.apply(F);
1543  F.end_iteration();
1544  opstack[i]->increment(args.ptr);
1545  }
1546  if (any_updating) {
1547  size_t begin = edges.size();
1548  i = 0;
1549  args = Args<>(inputs);
1550  for (; i < opstack.size(); i++) {
1551  dep.clear();
1552  opstack[i]->dependencies_updating(args, dep);
1553  F.start_iteration();
1554  dep.apply(F);
1555  F.end_iteration();
1556  opstack[i]->increment(args.ptr);
1557  }
1558  for (size_t j = begin; j < edges.size(); j++)
1559  std::swap(edges[j].first, edges[j].second);
1560  }
1561 
1562  if (transpose) {
1563  for (size_t j = 0; j < edges.size(); j++)
1564  std::swap(edges[j].first, edges[j].second);
1565  }
1566 
1567  graph G(opstack.size(), edges);
1568 
1569  for (size_t i = 0; i < inv_index.size(); i++)
1570  G.inv2op.push_back(var2op[inv_index[i]]);
1571  for (size_t i = 0; i < dep_index.size(); i++)
1572  G.dep2op.push_back(var2op[dep_index[i]]);
1573  return G;
1574 }
1575 
1576 graph global::forward_graph(std::vector<bool> keep_var) {
1577  if (keep_var.size() == 0) {
1578  keep_var.resize(values.size(), true);
1579  }
1580  TMBAD_ASSERT(values.size() == keep_var.size());
1581  return build_graph(false, keep_var);
1582 }
1583 
1584 graph global::reverse_graph(std::vector<bool> keep_var) {
1585  if (keep_var.size() == 0) {
1586  keep_var.resize(values.size(), true);
1587  }
1588  TMBAD_ASSERT(values.size() == keep_var.size());
1589  return build_graph(true, keep_var);
1590 }
1591 
1592 bool global::identical(const global &other) const {
1593  if (inv_index != other.inv_index) return false;
1594  ;
1595  if (dep_index != other.dep_index) return false;
1596  ;
1597  if (opstack.size() != other.opstack.size()) return false;
1598  ;
1599  for (size_t i = 0; i < opstack.size(); i++) {
1600  if (opstack[i]->identifier() != other.opstack[i]->identifier())
1601  return false;
1602  ;
1603  }
1604  if (inputs != other.inputs) return false;
1605  ;
1606  if (values.size() != other.values.size()) return false;
1607  ;
1608  OperatorPure *constant = getOperator<ConstOp>();
1609  IndexPair ptr(0, 0);
1610  for (size_t i = 0; i < opstack.size(); i++) {
1611  if (opstack[i] == constant) {
1612  if (values[ptr.second] != other.values[ptr.second]) return false;
1613  ;
1614  }
1615  opstack[i]->increment(ptr);
1616  }
1617 
1618  return true;
1619 }
1620 
1621 hash_t global::hash() const {
1622  hash_t h = 37;
1623 
1624  hash(h, inv_index.size());
1625  ;
1626  for (size_t i = 0; i < inv_index.size(); i++) hash(h, inv_index[i]);
1627  ;
1628  ;
1629  hash(h, dep_index.size());
1630  ;
1631  for (size_t i = 0; i < dep_index.size(); i++) hash(h, dep_index[i]);
1632  ;
1633  ;
1634  hash(h, opstack.size());
1635  ;
1636  for (size_t i = 0; i < opstack.size(); i++) hash(h, opstack[i]);
1637  ;
1638  ;
1639  hash(h, inputs.size());
1640  ;
1641  for (size_t i = 0; i < inputs.size(); i++) hash(h, inputs[i]);
1642  ;
1643  ;
1644  hash(h, values.size());
1645  ;
1646  OperatorPure *constant = getOperator<ConstOp>();
1647  IndexPair ptr(0, 0);
1648  for (size_t i = 0; i < opstack.size(); i++) {
1649  if (opstack[i] == constant) {
1650  hash(h, values[ptr.second]);
1651  ;
1652  }
1653  opstack[i]->increment(ptr);
1654  }
1655 
1656  return h;
1657 }
1658 
1659 std::vector<hash_t> global::hash_sweep(hash_config cfg) const {
1660  std::vector<Index> opstack_id;
1661  if (cfg.deterministic) {
1662  std::vector<size_t> tmp(opstack.size());
1663  for (size_t i = 0; i < tmp.size(); i++)
1664  tmp[i] = (size_t)opstack[i]->identifier();
1665  opstack_id = radix::first_occurance<Index>(tmp);
1666  hash_t spread = (hash_t(1) << (sizeof(hash_t) * 4)) - 1;
1667  for (size_t i = 0; i < opstack_id.size(); i++)
1668  opstack_id[i] = (opstack_id[i] + 1) * spread;
1669  }
1670 
1671  std::vector<hash_t> hash_vec(values.size(), 37);
1672  Dependencies dep;
1673  OperatorPure *inv = getOperator<InvOp>();
1674  OperatorPure *constant = getOperator<ConstOp>();
1675 
1676  if (cfg.strong_inv) {
1677  bool have_inv_seed = (cfg.inv_seed.size() > 0);
1678  if (have_inv_seed) {
1679  TMBAD_ASSERT(cfg.inv_seed.size() == inv_index.size());
1680  }
1681  for (size_t i = 0; i < inv_index.size(); i++) {
1682  hash_vec[inv_index[i]] += (have_inv_seed ? cfg.inv_seed[i] + 1 : (i + 1));
1683  }
1684  }
1685 
1686  Args<> args(inputs);
1687  IndexPair &ptr = args.ptr;
1688  for (size_t i = 0; i < opstack.size(); i++) {
1689  if (opstack[i] == inv) {
1690  opstack[i]->increment(ptr);
1691  continue;
1692  }
1693  dep.clear();
1694 
1695  opstack[i]->dependencies(args, dep);
1696 
1697  hash_t h = 37;
1698  for (size_t j = 0; j < dep.size(); j++) {
1699  if (j == 0)
1700  h = hash_vec[dep[0]];
1701  else
1702  hash(h, hash_vec[dep[j]]);
1703  ;
1704  }
1705 
1706  if (!cfg.deterministic) {
1707  hash(h, opstack[i]->identifier());
1708  ;
1709  } else {
1710  hash(h, opstack_id[i]);
1711  ;
1712  }
1713 
1714  if (opstack[i] == constant && cfg.strong_const) {
1715  hash(h, values[ptr.second]);
1716  ;
1717 
1718  hash(h, values[ptr.second] > 0);
1719  ;
1720  }
1721 
1722  size_t noutput = opstack[i]->output_size();
1723  for (size_t j = 0; j < noutput; j++) {
1724  hash_vec[ptr.second + j] = h + j * cfg.strong_output;
1725  }
1726 
1727  opstack[i]->increment(ptr);
1728  }
1729  if (!cfg.reduce) return hash_vec;
1730  std::vector<hash_t> ans(dep_index.size());
1731  for (size_t j = 0; j < dep_index.size(); j++) {
1732  ans[j] = hash_vec[dep_index[j]];
1733  }
1734  return ans;
1735 }
1736 
1737 std::vector<hash_t> global::hash_sweep(bool weak) const {
1738  hash_config cfg;
1739  cfg.strong_inv = !weak;
1740  cfg.strong_const = true;
1741  cfg.strong_output = true;
1742  cfg.reduce = weak;
1743  cfg.deterministic = false;
1744  return hash_sweep(cfg);
1745 }
1746 
1747 void global::eliminate() {
1748  this->shrink_to_fit();
1749 
1750  std::vector<bool> marks;
1751  marks.resize(values.size(), false);
1752 
1753  for (size_t i = 0; i < inv_index.size(); i++) marks[inv_index[i]] = true;
1754  for (size_t i = 0; i < dep_index.size(); i++) marks[dep_index[i]] = true;
1755 
1756  reverse(marks);
1757 
1758  if (false) {
1759  set_subgraph(marks);
1760 
1761  *this = extract_sub();
1762  }
1763  this->extract_sub_inplace(marks);
1764  this->shrink_to_fit();
1765 }
1766 
1767 global::print_config::print_config() : prefix(""), mark("*"), depth(0) {}
1768 
1769 void global::print(print_config cfg) {
1770  using std::endl;
1771  using std::left;
1772  using std::setw;
1773  IndexPair ptr(0, 0);
1774  std::vector<bool> sgm = subgraph_marks();
1775  bool have_subgraph = (subgraph_seq.size() > 0);
1776  int v = 0;
1777  print_config cfg2 = cfg;
1778  cfg2.depth--;
1779  cfg2.prefix = cfg.prefix + "##";
1780  Rcout << cfg.prefix;
1781  Rcout << setw(7) << "OpName:" << setw(7 + have_subgraph)
1782  << "Node:" << setw(13) << "Value:" << setw(13) << "Deriv:" << setw(13)
1783  << "Index:";
1784  Rcout << " "
1785  << "Inputs:";
1786  Rcout << endl;
1787  for (size_t i = 0; i < opstack.size(); i++) {
1788  Rcout << cfg.prefix;
1789  Rcout << setw(7) << opstack[i]->op_name();
1790  if (have_subgraph) {
1791  if (sgm[i])
1792  Rcout << cfg.mark;
1793  else
1794  Rcout << " ";
1795  }
1796  Rcout << setw(7) << i;
1797  int numvar = opstack[i]->output_size();
1798  for (int j = 0; j < numvar + (numvar == 0); j++) {
1799  if (j > 0) Rcout << cfg.prefix;
1800  Rcout << setw((7 + 7) * (j > 0) + 13);
1801  if (numvar > 0)
1802  Rcout << values[v];
1803  else
1804  Rcout << "";
1805  Rcout << setw(13);
1806  if (numvar > 0) {
1807  if (derivs.size() == values.size())
1808  Rcout << derivs[v];
1809  else
1810  Rcout << "NA";
1811  } else {
1812  Rcout << "";
1813  }
1814  Rcout << setw(13);
1815  if (numvar > 0) {
1816  Rcout << v;
1817  } else {
1818  Rcout << "";
1819  }
1820  if (j == 0) {
1821  IndexPair ptr_old = ptr;
1822  opstack[i]->increment(ptr);
1823  int ninput = ptr.first - ptr_old.first;
1824  for (int k = 0; k < ninput; k++) {
1825  if (k == 0) Rcout << " ";
1826  Rcout << " " << inputs[ptr_old.first + k];
1827  }
1828  }
1829  Rcout << endl;
1830  if (numvar > 0) {
1831  v++;
1832  }
1833  }
1834  if (cfg.depth > 0) opstack[i]->print(cfg2);
1835  }
1836 }
1837 
1838 void global::print() { this->print(print_config()); }
1839 
1840 global::DynamicInputOutputOperator::DynamicInputOutputOperator(Index ninput,
1841  Index noutput)
1842  : ninput_(ninput), noutput_(noutput) {}
1843 
1844 Index global::DynamicInputOutputOperator::input_size() const {
1845  return this->ninput_;
1846 }
1847 
1848 Index global::DynamicInputOutputOperator::output_size() const {
1849  return this->noutput_;
1850 }
1851 
1852 const char *global::InvOp::op_name() { return "InvOp"; }
1853 
1854 const char *global::DepOp::op_name() { return "DepOp"; }
1855 
1856 void global::ConstOp::forward(ForwardArgs<Replay> &args) {
1857  args.y(0).addToTape();
1858 }
1859 
1860 const char *global::ConstOp::op_name() { return "ConstOp"; }
1861 
1862 void global::ConstOp::forward(ForwardArgs<Writer> &args) {
1863  if (args.const_literals) {
1864  args.y(0) = args.y_const(0);
1865  }
1866 }
1867 
1868 global::DataOp::DataOp(Index n) { Base::noutput = n; }
1869 
1870 const char *global::DataOp::op_name() { return "DataOp"; }
1871 
1872 void global::DataOp::forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
1873 
1874 global::ZeroOp::ZeroOp(Index n) { Base::noutput = n; }
1875 
1876 const char *global::ZeroOp::op_name() { return "ZeroOp"; }
1877 
1878 void global::ZeroOp::forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
1879 
1880 void global::ZeroOp::operator()(Replay *x, Index n) {
1881  Complete<ZeroOp> Z(n);
1882  ad_segment y = Z(ad_segment());
1883  for (size_t i = 0; i < n; i++) x[i] = y[i];
1884 }
1885 
1886 global::NullOp::NullOp() {}
1887 
1888 const char *global::NullOp::op_name() { return "NullOp"; }
1889 
1890 global::NullOp2::NullOp2(Index ninput, Index noutput)
1891  : global::DynamicInputOutputOperator(ninput, noutput) {}
1892 
1893 const char *global::NullOp2::op_name() { return "NullOp2"; }
1894 
1895 global::RefOp::RefOp(global *glob, Index i) : glob(glob), i(i) {}
1896 
1897 void global::RefOp::forward(ForwardArgs<Scalar> &args) {
1898  args.y(0) = glob->values[i];
1899 }
1900 
1901 void global::RefOp::forward(ForwardArgs<Replay> &args) {
1902  if (get_glob() == this->glob) {
1903  ad_plain tmp;
1904  tmp.index = i;
1905  args.y(0) = tmp;
1906  } else {
1907  global::OperatorPure *pOp =
1908  get_glob()->getOperator<RefOp>(this->glob, this->i);
1909  args.y(0) =
1910  get_glob()->add_to_stack<RefOp>(pOp, std::vector<ad_plain>(0))[0];
1911  }
1912 }
1913 
1914 void global::RefOp::reverse(ReverseArgs<Replay> &args) {
1915  if (get_glob() == this->glob) {
1916  args.dx(0) += args.dy(0);
1917  }
1918 }
1919 
1920 const char *global::RefOp::op_name() { return "RefOp"; }
1921 
1922 OperatorPure *global::Fuse(OperatorPure *Op1, OperatorPure *Op2) {
1923  if (Op1 == Op2)
1924  return Op1->self_fuse();
1925  else
1926  return Op1->other_fuse(Op2);
1927 }
1928 
1929 void global::set_fuse(bool flag) { fuse = flag; }
1930 
1931 void global::add_to_opstack(OperatorPure *pOp) {
1932  if (fuse) {
1933  while (this->opstack.size() > 0) {
1934  OperatorPure *OpTry = this->Fuse(this->opstack.back(), pOp);
1935  if (OpTry == NULL) break;
1936 
1937  this->opstack.pop_back();
1938  pOp = OpTry;
1939  }
1940  }
1941 
1942  this->opstack.push_back(pOp);
1943 }
1944 
1945 bool global::ad_plain::initialized() const { return index != NA; }
1946 
1947 bool global::ad_plain::on_some_tape() const { return initialized(); }
1948 
1949 void global::ad_plain::addToTape() const { TMBAD_ASSERT(initialized()); }
1950 
1951 global *global::ad_plain::glob() const {
1952  return (on_some_tape() ? get_glob() : NULL);
1953 }
1954 
1955 void global::ad_plain::override_by(const ad_plain &x) const {}
1956 
1957 global::ad_plain::ad_plain() : index(NA) {}
1958 
1959 global::ad_plain::ad_plain(Scalar x) {
1960  *this = get_glob()->add_to_stack<ConstOp>(x);
1961 }
1962 
1963 global::ad_plain::ad_plain(ad_aug x) {
1964  x.addToTape();
1965  *this = x.taped_value;
1966 }
1967 
1968 Replay global::ad_plain::CopyOp::eval(Replay x0) { return x0.copy(); }
1969 
1970 const char *global::ad_plain::CopyOp::op_name() { return "CopyOp"; }
1971 
1972 ad_plain global::ad_plain::copy() const {
1973  ad_plain ans = get_glob()->add_to_stack<CopyOp>(*this);
1974  return ans;
1975 }
1976 
1977 Replay global::ad_plain::ValOp::eval(Replay x0) { return x0.copy0(); }
1978 
1979 void global::ad_plain::ValOp::dependencies(Args<> &args,
1980  Dependencies &dep) const {}
1981 
1982 const char *global::ad_plain::ValOp::op_name() { return "ValOp"; }
1983 
1984 ad_plain global::ad_plain::copy0() const {
1985  ad_plain ans = get_glob()->add_to_stack<ValOp>(*this);
1986  return ans;
1987 }
1988 
1989 ad_plain global::ad_plain::operator+(const ad_plain &other) const {
1990  ad_plain ans;
1991  ans = get_glob()->add_to_stack<AddOp>(*this, other);
1992  return ans;
1993 }
1994 
1995 ad_plain global::ad_plain::operator-(const ad_plain &other) const {
1996  ad_plain ans;
1997  ans = get_glob()->add_to_stack<SubOp>(*this, other);
1998  return ans;
1999 }
2000 
2001 ad_plain global::ad_plain::operator*(const ad_plain &other) const {
2002  ad_plain ans = get_glob()->add_to_stack<MulOp>(*this, other);
2003  return ans;
2004 }
2005 
2006 ad_plain global::ad_plain::operator*(const Scalar &other) const {
2007  ad_plain ans =
2008  get_glob()->add_to_stack<MulOp_<true, false> >(*this, ad_plain(other));
2009  return ans;
2010 }
2011 
2012 ad_plain global::ad_plain::operator/(const ad_plain &other) const {
2013  ad_plain ans = get_glob()->add_to_stack<DivOp>(*this, other);
2014  return ans;
2015 }
2016 
2017 const char *global::ad_plain::NegOp::op_name() { return "NegOp"; }
2018 
2019 ad_plain global::ad_plain::operator-() const {
2020  ad_plain ans = get_glob()->add_to_stack<NegOp>(*this);
2021  return ans;
2022 }
2023 
2024 ad_plain &global::ad_plain::operator+=(const ad_plain &other) {
2025  *this = *this + other;
2026  return *this;
2027 }
2028 
2029 ad_plain &global::ad_plain::operator-=(const ad_plain &other) {
2030  *this = *this - other;
2031  return *this;
2032 }
2033 
2034 ad_plain &global::ad_plain::operator*=(const ad_plain &other) {
2035  *this = *this * other;
2036  return *this;
2037 }
2038 
2039 ad_plain &global::ad_plain::operator/=(const ad_plain &other) {
2040  *this = *this / other;
2041  return *this;
2042 }
2043 
2044 void global::ad_plain::Dependent() {
2045  *this = get_glob()->add_to_stack<DepOp>(*this);
2046  get_glob()->dep_index.push_back(this->index);
2047 }
2048 
2049 void global::ad_plain::Independent() {
2050  Scalar val = (index == NA ? NAN : this->Value());
2051  *this = get_glob()->add_to_stack<InvOp>(val);
2052  get_glob()->inv_index.push_back(this->index);
2053 }
2054 
2055 Scalar &global::ad_plain::Value() { return get_glob()->values[index]; }
2056 
2057 Scalar global::ad_plain::Value() const { return get_glob()->values[index]; }
2058 
2059 Scalar global::ad_plain::Value(global *glob) const {
2060  return glob->values[index];
2061 }
2062 
2063 Scalar &global::ad_plain::Deriv() { return get_glob()->derivs[index]; }
2064 
2065 void global::ad_start() {
2066  TMBAD_ASSERT2(!in_use, "Tape already in use");
2067  TMBAD_ASSERT(parent_glob == NULL);
2068  parent_glob = global_ptr[TMBAD_THREAD_NUM];
2069  global_ptr[TMBAD_THREAD_NUM] = this;
2070  in_use = true;
2071 }
2072 
2073 void global::ad_stop() {
2074  TMBAD_ASSERT2(in_use, "Tape not in use");
2075  global_ptr[TMBAD_THREAD_NUM] = parent_glob;
2076  parent_glob = NULL;
2077  in_use = false;
2078 }
2079 
2080 void global::Independent(std::vector<ad_plain> &x) {
2081  for (size_t i = 0; i < x.size(); i++) {
2082  x[i].Independent();
2083  }
2084 }
2085 
2086 global::ad_segment::ad_segment() : n(0), c(0) {}
2087 
2088 global::ad_segment::ad_segment(ad_plain x, size_t n) : x(x), n(n), c(1) {}
2089 
2090 global::ad_segment::ad_segment(ad_aug x) : x(ad_plain(x)), n(1), c(1) {}
2091 
2092 global::ad_segment::ad_segment(Scalar x) : x(ad_plain(x)), n(1), c(1) {}
2093 
2094 global::ad_segment::ad_segment(Index idx, size_t n) : n(n) { x.index = idx; }
2095 
2096 global::ad_segment::ad_segment(ad_plain x, size_t r, size_t c)
2097  : x(x), n(r * c), c(c) {}
2098 
2099 global::ad_segment::ad_segment(Replay *x, size_t n, bool zero_check)
2100  : n(n), c(1) {
2101  if (zero_check && all_zero(x, n)) return;
2102  if (all_constant(x, n)) {
2103  global *glob = get_glob();
2104  size_t m = glob->values.size();
2105  Complete<DataOp> D(n);
2106  D(ad_segment());
2107  for (size_t i = 0; i < n; i++) glob->values[m + i] = x[i].Value();
2108  this->x.index = m;
2109  return;
2110  }
2111  if (!is_contiguous(x, n)) {
2112  size_t before = get_glob()->values.size();
2113  this->x = x[0].copy();
2114  for (size_t i = 1; i < n; i++) x[i].copy();
2115  size_t after = get_glob()->values.size();
2116  TMBAD_ASSERT2(after - before == n,
2117  "Each invocation of copy() should construct a new variable");
2118  return;
2119  }
2120  if (n > 0) this->x = x[0];
2121 }
2122 
2123 bool global::ad_segment::identicalZero() { return !x.initialized(); }
2124 
2125 bool global::ad_segment::all_on_active_tape(Replay *x, size_t n) {
2126  global *cur_glob = get_glob();
2127  for (size_t i = 0; i < n; i++) {
2128  bool ok = x[i].on_some_tape() && (x[i].glob() == cur_glob);
2129  if (!ok) return false;
2130  }
2131  return true;
2132 }
2133 
2134 bool global::ad_segment::is_contiguous(Replay *x, size_t n) {
2135  if (!all_on_active_tape(x, n)) return false;
2136  for (size_t i = 1; i < n; i++) {
2137  if (x[i].index() != x[i - 1].index() + 1) return false;
2138  }
2139  return true;
2140 }
2141 
2142 bool global::ad_segment::all_zero(Replay *x, size_t n) {
2143  for (size_t i = 0; i < n; i++) {
2144  if (!x[i].identicalZero()) return false;
2145  }
2146  return true;
2147 }
2148 
2149 bool global::ad_segment::all_constant(Replay *x, size_t n) {
2150  for (size_t i = 0; i < n; i++) {
2151  if (!x[i].constant()) return false;
2152  }
2153  return true;
2154 }
2155 
2156 size_t global::ad_segment::size() const { return n; }
2157 
2158 size_t global::ad_segment::rows() const { return n / c; }
2159 
2160 size_t global::ad_segment::cols() const { return c; }
2161 
2162 ad_plain global::ad_segment::operator[](size_t i) const {
2163  ad_plain ans;
2164  ans.index = x.index + i;
2165  return ans;
2166 }
2167 
2168 ad_plain global::ad_segment::offset() const { return x; }
2169 
2170 Index global::ad_segment::index() const { return x.index; }
2171 
2172 bool global::ad_aug::on_some_tape() const { return taped_value.initialized(); }
2173 
2175  return on_some_tape() && (this->glob() == get_glob());
2176 }
2177 
2178 bool global::ad_aug::ontape() const { return on_some_tape(); }
2179 
2180 bool global::ad_aug::constant() const { return !taped_value.initialized(); }
2181 
2182 Index global::ad_aug::index() const { return taped_value.index; }
2183 
2184 global *global::ad_aug::glob() const {
2185  return (on_some_tape() ? data.glob : NULL);
2186 }
2187 
2188 Scalar global::ad_aug::Value() const {
2189  if (on_some_tape())
2190  return taped_value.Value(this->data.glob);
2191  else
2192  return data.value;
2193 }
2194 
2196 
2197 global::ad_aug::ad_aug(Scalar x) { data.value = x; }
2198 
2199 global::ad_aug::ad_aug(ad_plain x) : taped_value(x) { data.glob = get_glob(); }
2200 
2202  if (on_some_tape()) {
2203  if (data.glob != get_glob()) {
2204  TMBAD_ASSERT2(in_context_stack(data.glob), "Variable not initialized?");
2205  global::OperatorPure *pOp =
2206  get_glob()->getOperator<RefOp>(data.glob, taped_value.index);
2207  this->taped_value =
2208  get_glob()->add_to_stack<RefOp>(pOp, std::vector<ad_plain>(0))[0];
2209 
2210  this->data.glob = get_glob();
2211  }
2212  return;
2213  }
2214  this->taped_value = ad_plain(data.value);
2215  this->data.glob = get_glob();
2216 }
2217 
2218 void global::ad_aug::override_by(const ad_plain &x) const {
2219  this->taped_value = x;
2220  this->data.glob = get_glob();
2221 }
2222 
2224  global *cur_glob = get_glob();
2225  while (cur_glob != NULL) {
2226  if (cur_glob == glob) return true;
2227  cur_glob = cur_glob->parent_glob;
2228  }
2229  return false;
2230 }
2231 
2233  if (on_active_tape()) {
2234  return taped_value.copy();
2235  } else {
2236  ad_aug cpy = *this;
2237  cpy.addToTape();
2238  return cpy;
2239  }
2240 }
2241 
2243  ad_aug cpy = *this;
2244  if (!cpy.on_active_tape()) {
2245  cpy.addToTape();
2246  }
2247  return cpy.taped_value.copy0();
2248 }
2249 
2251  return constant() && data.value == Scalar(0);
2252 }
2253 
2255  return constant() && data.value == Scalar(1);
2256 }
2257 
2258 bool global::ad_aug::bothConstant(const ad_aug &other) const {
2259  return constant() && other.constant();
2260 }
2261 
2262 bool global::ad_aug::identical(const ad_aug &other) const {
2263  if (constant() && other.constant()) return (data.value == other.data.value);
2264 
2265  if (glob() == other.glob())
2266  return (taped_value.index == other.taped_value.index);
2267  return false;
2268 }
2269 
2271  if (bothConstant(other)) return Scalar(this->data.value + other.data.value);
2272  if (this->identicalZero()) return other;
2273  if (other.identicalZero()) return *this;
2274  return ad_plain(*this) + ad_plain(other);
2275 }
2276 
2278  if (bothConstant(other)) return Scalar(this->data.value - other.data.value);
2279  if (other.identicalZero()) return *this;
2280  if (this->identicalZero()) return -other;
2281  if (this->identical(other)) return Scalar(0);
2282  return ad_plain(*this) - ad_plain(other);
2283 }
2284 
2286  if (this->constant()) return Scalar(-(this->data.value));
2287  return -ad_plain(*this);
2288 }
2289 
2291  if (bothConstant(other)) return Scalar(this->data.value * other.data.value);
2292  if (this->identicalZero()) return *this;
2293  if (other.identicalZero()) return other;
2294  if (this->identicalOne()) return other;
2295  if (other.identicalOne()) return *this;
2296  if (this->constant()) return ad_plain(other) * Scalar(this->data.value);
2297  if (other.constant()) return ad_plain(*this) * Scalar(other.data.value);
2298  return ad_plain(*this) * ad_plain(other);
2299 }
2300 
2302  if (bothConstant(other)) return Scalar(this->data.value / other.data.value);
2303  if (this->identicalZero()) return *this;
2304  if (other.identicalOne()) return *this;
2305  return ad_plain(*this) / ad_plain(other);
2306 }
2307 
2309  *this = *this + other;
2310  return *this;
2311 }
2312 
2314  *this = *this - other;
2315  return *this;
2316 }
2317 
2319  *this = *this * other;
2320  return *this;
2321 }
2322 
2324  *this = *this / other;
2325  return *this;
2326 }
2327 
2329  this->addToTape();
2330  taped_value.Dependent();
2331 }
2332 
2334  taped_value.Independent();
2335  taped_value.Value() = this->data.value;
2336  this->data.glob = get_glob();
2337 }
2338 
2339 Scalar &global::ad_aug::Value() {
2340  if (on_some_tape())
2341 
2342  return taped_value.Value();
2343  else
2344  return data.value;
2345 }
2346 
2347 Scalar &global::ad_aug::Deriv() { return taped_value.Deriv(); }
2348 
2349 void global::Independent(std::vector<ad_aug> &x) {
2350  for (size_t i = 0; i < x.size(); i++) {
2351  x[i].Independent();
2352  }
2353 }
2354 
2355 std::ostream &operator<<(std::ostream &os, const global::ad_plain &x) {
2356  os << x.Value();
2357  return os;
2358 }
2359 
2360 std::ostream &operator<<(std::ostream &os, const global::ad_aug &x) {
2361  os << "{";
2362  if (x.on_some_tape()) {
2363  os << "value=" << x.data.glob->values[x.taped_value.index] << ", ";
2364  os << "index=" << x.taped_value.index << ", ";
2365  os << "tape=" << x.data.glob;
2366  } else {
2367  os << "const=" << x.data.value;
2368  }
2369  os << "}";
2370  return os;
2371 }
2372 
2373 ad_plain_index::ad_plain_index(const Index &i) { this->index = i; }
2374 
2375 ad_plain_index::ad_plain_index(const ad_plain &x) : ad_plain(x) {}
2376 
2377 ad_aug_index::ad_aug_index(const Index &i) : ad_aug(ad_plain_index(i)) {}
2378 
2379 ad_aug_index::ad_aug_index(const ad_aug &x) : ad_aug(x) {}
2380 
2381 ad_aug_index::ad_aug_index(const ad_plain &x) : ad_aug(x) {}
2382 
2383 Scalar Value(Scalar x) { return x; }
2384 
2385 ad_aug operator+(const double &x, const ad_aug &y) { return ad_aug(x) + y; }
2386 
2387 ad_aug operator-(const double &x, const ad_aug &y) { return ad_aug(x) - y; }
2388 
2389 ad_aug operator*(const double &x, const ad_aug &y) { return ad_aug(x) * y; }
2390 
2391 ad_aug operator/(const double &x, const ad_aug &y) { return ad_aug(x) / y; }
2392 
2393 bool operator<(const double &x, const ad_adapt &y) { return x < y.Value(); }
2394 
2395 bool operator<=(const double &x, const ad_adapt &y) { return x <= y.Value(); }
2396 
2397 bool operator>(const double &x, const ad_adapt &y) { return x > y.Value(); }
2398 
2399 bool operator>=(const double &x, const ad_adapt &y) { return x >= y.Value(); }
2400 
2401 bool operator==(const double &x, const ad_adapt &y) { return x == y.Value(); }
2402 
2403 bool operator!=(const double &x, const ad_adapt &y) { return x != y.Value(); }
2404 
2405 Writer floor(const Writer &x) {
2406  return "floor"
2407  "(" +
2408  x + ")";
2409 }
2410 const char *FloorOp::op_name() { return "FloorOp"; }
2411 ad_plain floor(const ad_plain &x) {
2412  return get_glob()->add_to_stack<FloorOp>(x);
2413 }
2414 ad_aug floor(const ad_aug &x) {
2415  if (x.constant())
2416  return Scalar(floor(x.Value()));
2417  else
2418  return floor(ad_plain(x));
2419 }
2420 
2421 Writer ceil(const Writer &x) {
2422  return "ceil"
2423  "(" +
2424  x + ")";
2425 }
2426 const char *CeilOp::op_name() { return "CeilOp"; }
2427 ad_plain ceil(const ad_plain &x) { return get_glob()->add_to_stack<CeilOp>(x); }
2428 ad_aug ceil(const ad_aug &x) {
2429  if (x.constant())
2430  return Scalar(ceil(x.Value()));
2431  else
2432  return ceil(ad_plain(x));
2433 }
2434 
2435 Writer trunc(const Writer &x) {
2436  return "trunc"
2437  "(" +
2438  x + ")";
2439 }
2440 const char *TruncOp::op_name() { return "TruncOp"; }
2441 ad_plain trunc(const ad_plain &x) {
2442  return get_glob()->add_to_stack<TruncOp>(x);
2443 }
2444 ad_aug trunc(const ad_aug &x) {
2445  if (x.constant())
2446  return Scalar(trunc(x.Value()));
2447  else
2448  return trunc(ad_plain(x));
2449 }
2450 
2451 Writer round(const Writer &x) {
2452  return "round"
2453  "(" +
2454  x + ")";
2455 }
2456 const char *RoundOp::op_name() { return "RoundOp"; }
2457 ad_plain round(const ad_plain &x) {
2458  return get_glob()->add_to_stack<RoundOp>(x);
2459 }
2460 ad_aug round(const ad_aug &x) {
2461  if (x.constant())
2462  return Scalar(round(x.Value()));
2463  else
2464  return round(ad_plain(x));
2465 }
2466 
2467 double sign(const double &x) { return (x >= 0) - (x < 0); }
2468 
2469 Writer sign(const Writer &x) {
2470  return "sign"
2471  "(" +
2472  x + ")";
2473 }
2474 const char *SignOp::op_name() { return "SignOp"; }
2475 ad_plain sign(const ad_plain &x) { return get_glob()->add_to_stack<SignOp>(x); }
2476 ad_aug sign(const ad_aug &x) {
2477  if (x.constant())
2478  return Scalar(sign(x.Value()));
2479  else
2480  return sign(ad_plain(x));
2481 }
2482 
2483 double ge0(const double &x) { return (x >= 0); }
2484 
2485 double lt0(const double &x) { return (x < 0); }
2486 
2487 Writer ge0(const Writer &x) {
2488  return "ge0"
2489  "(" +
2490  x + ")";
2491 }
2492 const char *Ge0Op::op_name() { return "Ge0Op"; }
2493 ad_plain ge0(const ad_plain &x) { return get_glob()->add_to_stack<Ge0Op>(x); }
2494 ad_aug ge0(const ad_aug &x) {
2495  if (x.constant())
2496  return Scalar(ge0(x.Value()));
2497  else
2498  return ge0(ad_plain(x));
2499 }
2500 
2501 Writer lt0(const Writer &x) {
2502  return "lt0"
2503  "(" +
2504  x + ")";
2505 }
2506 const char *Lt0Op::op_name() { return "Lt0Op"; }
2507 ad_plain lt0(const ad_plain &x) { return get_glob()->add_to_stack<Lt0Op>(x); }
2508 ad_aug lt0(const ad_aug &x) {
2509  if (x.constant())
2510  return Scalar(lt0(x.Value()));
2511  else
2512  return lt0(ad_plain(x));
2513 }
2514 
2515 Writer fabs(const Writer &x) {
2516  return "fabs"
2517  "(" +
2518  x + ")";
2519 }
2520 void AbsOp::reverse(ReverseArgs<Scalar> &args) {
2521  typedef Scalar Type;
2522  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * sign(args.x(0));
2523 }
2524 const char *AbsOp::op_name() { return "AbsOp"; }
2525 ad_plain fabs(const ad_plain &x) { return get_glob()->add_to_stack<AbsOp>(x); }
2526 ad_aug fabs(const ad_aug &x) {
2527  if (x.constant())
2528  return Scalar(fabs(x.Value()));
2529  else
2530  return fabs(ad_plain(x));
2531 }
2532 ad_adapt fabs(const ad_adapt &x) { return ad_adapt(fabs(ad_aug(x))); }
2533 
2534 Writer sin(const Writer &x) {
2535  return "sin"
2536  "(" +
2537  x + ")";
2538 }
2539 void SinOp::reverse(ReverseArgs<Scalar> &args) {
2540  typedef Scalar Type;
2541  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * cos(args.x(0));
2542 }
2543 const char *SinOp::op_name() { return "SinOp"; }
2544 ad_plain sin(const ad_plain &x) { return get_glob()->add_to_stack<SinOp>(x); }
2545 ad_aug sin(const ad_aug &x) {
2546  if (x.constant())
2547  return Scalar(sin(x.Value()));
2548  else
2549  return sin(ad_plain(x));
2550 }
2551 ad_adapt sin(const ad_adapt &x) { return ad_adapt(sin(ad_aug(x))); }
2552 
2553 Writer cos(const Writer &x) {
2554  return "cos"
2555  "(" +
2556  x + ")";
2557 }
2558 void CosOp::reverse(ReverseArgs<Scalar> &args) {
2559  typedef Scalar Type;
2560  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * -sin(args.x(0));
2561 }
2562 const char *CosOp::op_name() { return "CosOp"; }
2563 ad_plain cos(const ad_plain &x) { return get_glob()->add_to_stack<CosOp>(x); }
2564 ad_aug cos(const ad_aug &x) {
2565  if (x.constant())
2566  return Scalar(cos(x.Value()));
2567  else
2568  return cos(ad_plain(x));
2569 }
2570 ad_adapt cos(const ad_adapt &x) { return ad_adapt(cos(ad_aug(x))); }
2571 
2572 Writer exp(const Writer &x) {
2573  return "exp"
2574  "(" +
2575  x + ")";
2576 }
2577 void ExpOp::reverse(ReverseArgs<Scalar> &args) {
2578  typedef Scalar Type;
2579  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * args.y(0);
2580 }
2581 const char *ExpOp::op_name() { return "ExpOp"; }
2582 ad_plain exp(const ad_plain &x) { return get_glob()->add_to_stack<ExpOp>(x); }
2583 ad_aug exp(const ad_aug &x) {
2584  if (x.constant())
2585  return Scalar(exp(x.Value()));
2586  else
2587  return exp(ad_plain(x));
2588 }
2589 ad_adapt exp(const ad_adapt &x) { return ad_adapt(exp(ad_aug(x))); }
2590 
2591 Writer log(const Writer &x) {
2592  return "log"
2593  "(" +
2594  x + ")";
2595 }
2596 void LogOp::reverse(ReverseArgs<Scalar> &args) {
2597  typedef Scalar Type;
2598  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * Type(1.) / args.x(0);
2599 }
2600 const char *LogOp::op_name() { return "LogOp"; }
2601 ad_plain log(const ad_plain &x) { return get_glob()->add_to_stack<LogOp>(x); }
2602 ad_aug log(const ad_aug &x) {
2603  if (x.constant())
2604  return Scalar(log(x.Value()));
2605  else
2606  return log(ad_plain(x));
2607 }
2608 ad_adapt log(const ad_adapt &x) { return ad_adapt(log(ad_aug(x))); }
2609 
2610 Writer sqrt(const Writer &x) {
2611  return "sqrt"
2612  "(" +
2613  x + ")";
2614 }
2615 void SqrtOp::reverse(ReverseArgs<Scalar> &args) {
2616  typedef Scalar Type;
2617  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * Type(0.5) / args.y(0);
2618 }
2619 const char *SqrtOp::op_name() { return "SqrtOp"; }
2620 ad_plain sqrt(const ad_plain &x) { return get_glob()->add_to_stack<SqrtOp>(x); }
2621 ad_aug sqrt(const ad_aug &x) {
2622  if (x.constant())
2623  return Scalar(sqrt(x.Value()));
2624  else
2625  return sqrt(ad_plain(x));
2626 }
2627 ad_adapt sqrt(const ad_adapt &x) { return ad_adapt(sqrt(ad_aug(x))); }
2628 
2629 Writer tan(const Writer &x) {
2630  return "tan"
2631  "(" +
2632  x + ")";
2633 }
2634 void TanOp::reverse(ReverseArgs<Scalar> &args) {
2635  typedef Scalar Type;
2636  if (args.dy(0) != Type(0))
2637  args.dx(0) += args.dy(0) * Type(1.) / (cos(args.x(0)) * cos(args.x(0)));
2638 }
2639 const char *TanOp::op_name() { return "TanOp"; }
2640 ad_plain tan(const ad_plain &x) { return get_glob()->add_to_stack<TanOp>(x); }
2641 ad_aug tan(const ad_aug &x) {
2642  if (x.constant())
2643  return Scalar(tan(x.Value()));
2644  else
2645  return tan(ad_plain(x));
2646 }
2647 ad_adapt tan(const ad_adapt &x) { return ad_adapt(tan(ad_aug(x))); }
2648 
2649 Writer sinh(const Writer &x) {
2650  return "sinh"
2651  "(" +
2652  x + ")";
2653 }
2654 void SinhOp::reverse(ReverseArgs<Scalar> &args) {
2655  typedef Scalar Type;
2656  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * cosh(args.x(0));
2657 }
2658 const char *SinhOp::op_name() { return "SinhOp"; }
2659 ad_plain sinh(const ad_plain &x) { return get_glob()->add_to_stack<SinhOp>(x); }
2660 ad_aug sinh(const ad_aug &x) {
2661  if (x.constant())
2662  return Scalar(sinh(x.Value()));
2663  else
2664  return sinh(ad_plain(x));
2665 }
2666 ad_adapt sinh(const ad_adapt &x) { return ad_adapt(sinh(ad_aug(x))); }
2667 
2668 Writer cosh(const Writer &x) {
2669  return "cosh"
2670  "(" +
2671  x + ")";
2672 }
2673 void CoshOp::reverse(ReverseArgs<Scalar> &args) {
2674  typedef Scalar Type;
2675  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * sinh(args.x(0));
2676 }
2677 const char *CoshOp::op_name() { return "CoshOp"; }
2678 ad_plain cosh(const ad_plain &x) { return get_glob()->add_to_stack<CoshOp>(x); }
2679 ad_aug cosh(const ad_aug &x) {
2680  if (x.constant())
2681  return Scalar(cosh(x.Value()));
2682  else
2683  return cosh(ad_plain(x));
2684 }
2685 ad_adapt cosh(const ad_adapt &x) { return ad_adapt(cosh(ad_aug(x))); }
2686 
2687 Writer tanh(const Writer &x) {
2688  return "tanh"
2689  "(" +
2690  x + ")";
2691 }
2692 void TanhOp::reverse(ReverseArgs<Scalar> &args) {
2693  typedef Scalar Type;
2694  if (args.dy(0) != Type(0))
2695  args.dx(0) += args.dy(0) * Type(1.) / (cosh(args.x(0)) * cosh(args.x(0)));
2696 }
2697 const char *TanhOp::op_name() { return "TanhOp"; }
2698 ad_plain tanh(const ad_plain &x) { return get_glob()->add_to_stack<TanhOp>(x); }
2699 ad_aug tanh(const ad_aug &x) {
2700  if (x.constant())
2701  return Scalar(tanh(x.Value()));
2702  else
2703  return tanh(ad_plain(x));
2704 }
2705 ad_adapt tanh(const ad_adapt &x) { return ad_adapt(tanh(ad_aug(x))); }
2706 
2707 Writer expm1(const Writer &x) {
2708  return "expm1"
2709  "(" +
2710  x + ")";
2711 }
2712 void Expm1::reverse(ReverseArgs<Scalar> &args) {
2713  typedef Scalar Type;
2714  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * args.y(0) + Type(1.);
2715 }
2716 const char *Expm1::op_name() { return "Expm1"; }
2717 ad_plain expm1(const ad_plain &x) { return get_glob()->add_to_stack<Expm1>(x); }
2718 ad_aug expm1(const ad_aug &x) {
2719  if (x.constant())
2720  return Scalar(expm1(x.Value()));
2721  else
2722  return expm1(ad_plain(x));
2723 }
2724 ad_adapt expm1(const ad_adapt &x) { return ad_adapt(expm1(ad_aug(x))); }
2725 
2726 Writer log1p(const Writer &x) {
2727  return "log1p"
2728  "(" +
2729  x + ")";
2730 }
2731 void Log1p::reverse(ReverseArgs<Scalar> &args) {
2732  typedef Scalar Type;
2733  if (args.dy(0) != Type(0))
2734  args.dx(0) += args.dy(0) * Type(1.) / (args.x(0) + Type(1.));
2735 }
2736 const char *Log1p::op_name() { return "Log1p"; }
2737 ad_plain log1p(const ad_plain &x) { return get_glob()->add_to_stack<Log1p>(x); }
2738 ad_aug log1p(const ad_aug &x) {
2739  if (x.constant())
2740  return Scalar(log1p(x.Value()));
2741  else
2742  return log1p(ad_plain(x));
2743 }
2744 ad_adapt log1p(const ad_adapt &x) { return ad_adapt(log1p(ad_aug(x))); }
2745 
2746 Writer asin(const Writer &x) {
2747  return "asin"
2748  "(" +
2749  x + ")";
2750 }
2751 void AsinOp::reverse(ReverseArgs<Scalar> &args) {
2752  typedef Scalar Type;
2753  if (args.dy(0) != Type(0))
2754  args.dx(0) +=
2755  args.dy(0) * Type(1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
2756 }
2757 const char *AsinOp::op_name() { return "AsinOp"; }
2758 ad_plain asin(const ad_plain &x) { return get_glob()->add_to_stack<AsinOp>(x); }
2759 ad_aug asin(const ad_aug &x) {
2760  if (x.constant())
2761  return Scalar(asin(x.Value()));
2762  else
2763  return asin(ad_plain(x));
2764 }
2765 ad_adapt asin(const ad_adapt &x) { return ad_adapt(asin(ad_aug(x))); }
2766 
2767 Writer acos(const Writer &x) {
2768  return "acos"
2769  "(" +
2770  x + ")";
2771 }
2772 void AcosOp::reverse(ReverseArgs<Scalar> &args) {
2773  typedef Scalar Type;
2774  if (args.dy(0) != Type(0))
2775  args.dx(0) +=
2776  args.dy(0) * Type(-1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
2777 }
2778 const char *AcosOp::op_name() { return "AcosOp"; }
2779 ad_plain acos(const ad_plain &x) { return get_glob()->add_to_stack<AcosOp>(x); }
2780 ad_aug acos(const ad_aug &x) {
2781  if (x.constant())
2782  return Scalar(acos(x.Value()));
2783  else
2784  return acos(ad_plain(x));
2785 }
2786 ad_adapt acos(const ad_adapt &x) { return ad_adapt(acos(ad_aug(x))); }
2787 
2788 Writer atan(const Writer &x) {
2789  return "atan"
2790  "(" +
2791  x + ")";
2792 }
2793 void AtanOp::reverse(ReverseArgs<Scalar> &args) {
2794  typedef Scalar Type;
2795  if (args.dy(0) != Type(0))
2796  args.dx(0) += args.dy(0) * Type(1.) / (Type(1.) + args.x(0) * args.x(0));
2797 }
2798 const char *AtanOp::op_name() { return "AtanOp"; }
2799 ad_plain atan(const ad_plain &x) { return get_glob()->add_to_stack<AtanOp>(x); }
2800 ad_aug atan(const ad_aug &x) {
2801  if (x.constant())
2802  return Scalar(atan(x.Value()));
2803  else
2804  return atan(ad_plain(x));
2805 }
2806 ad_adapt atan(const ad_adapt &x) { return ad_adapt(atan(ad_aug(x))); }
2807 
2808 Writer asinh(const Writer &x) {
2809  return "asinh"
2810  "(" +
2811  x + ")";
2812 }
2813 void AsinhOp::reverse(ReverseArgs<Scalar> &args) {
2814  typedef Scalar Type;
2815  if (args.dy(0) != Type(0))
2816  args.dx(0) +=
2817  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) + Type(1.));
2818 }
2819 const char *AsinhOp::op_name() { return "AsinhOp"; }
2820 ad_plain asinh(const ad_plain &x) {
2821  return get_glob()->add_to_stack<AsinhOp>(x);
2822 }
2823 ad_aug asinh(const ad_aug &x) {
2824  if (x.constant())
2825  return Scalar(asinh(x.Value()));
2826  else
2827  return asinh(ad_plain(x));
2828 }
2829 ad_adapt asinh(const ad_adapt &x) { return ad_adapt(asinh(ad_aug(x))); }
2830 
2831 Writer acosh(const Writer &x) {
2832  return "acosh"
2833  "(" +
2834  x + ")";
2835 }
2836 void AcoshOp::reverse(ReverseArgs<Scalar> &args) {
2837  typedef Scalar Type;
2838  if (args.dy(0) != Type(0))
2839  args.dx(0) +=
2840  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) - Type(1.));
2841 }
2842 const char *AcoshOp::op_name() { return "AcoshOp"; }
2843 ad_plain acosh(const ad_plain &x) {
2844  return get_glob()->add_to_stack<AcoshOp>(x);
2845 }
2846 ad_aug acosh(const ad_aug &x) {
2847  if (x.constant())
2848  return Scalar(acosh(x.Value()));
2849  else
2850  return acosh(ad_plain(x));
2851 }
2852 ad_adapt acosh(const ad_adapt &x) { return ad_adapt(acosh(ad_aug(x))); }
2853 
2854 Writer atanh(const Writer &x) {
2855  return "atanh"
2856  "(" +
2857  x + ")";
2858 }
2859 void AtanhOp::reverse(ReverseArgs<Scalar> &args) {
2860  typedef Scalar Type;
2861  if (args.dy(0) != Type(0))
2862  args.dx(0) += args.dy(0) * Type(1.) / (Type(1) - args.x(0) * args.x(0));
2863 }
2864 const char *AtanhOp::op_name() { return "AtanhOp"; }
2865 ad_plain atanh(const ad_plain &x) {
2866  return get_glob()->add_to_stack<AtanhOp>(x);
2867 }
2868 ad_aug atanh(const ad_aug &x) {
2869  if (x.constant())
2870  return Scalar(atanh(x.Value()));
2871  else
2872  return atanh(ad_plain(x));
2873 }
2874 ad_adapt atanh(const ad_adapt &x) { return ad_adapt(atanh(ad_aug(x))); }
2875 
2876 Writer pow(const Writer &x1, const Writer &x2) {
2877  return "pow"
2878  "(" +
2879  x1 + "," + x2 + ")";
2880 }
2881 const char *PowOp::op_name() { return "PowOp"; }
2882 ad_plain pow(const ad_plain &x1, const ad_plain &x2) {
2883  return get_glob()->add_to_stack<PowOp>(x1, x2);
2884 }
2885 ad_aug pow(const ad_aug &x1, const ad_aug &x2) {
2886  if (x1.constant() && x2.constant())
2887  return Scalar(pow(x1.Value(), x2.Value()));
2888  else
2889  return pow(ad_plain(x1), ad_plain(x2));
2890 }
2891 ad_adapt pow(const ad_adapt &x1, const ad_adapt &x2) {
2892  return ad_adapt(pow(ad_aug(x1), ad_aug(x2)));
2893 }
2894 
2895 Writer atan2(const Writer &x1, const Writer &x2) {
2896  return "atan2"
2897  "(" +
2898  x1 + "," + x2 + ")";
2899 }
2900 const char *Atan2::op_name() { return "Atan2"; }
2901 ad_plain atan2(const ad_plain &x1, const ad_plain &x2) {
2902  return get_glob()->add_to_stack<Atan2>(x1, x2);
2903 }
2904 ad_aug atan2(const ad_aug &x1, const ad_aug &x2) {
2905  if (x1.constant() && x2.constant())
2906  return Scalar(atan2(x1.Value(), x2.Value()));
2907  else
2908  return atan2(ad_plain(x1), ad_plain(x2));
2909 }
2910 ad_adapt atan2(const ad_adapt &x1, const ad_adapt &x2) {
2911  return ad_adapt(atan2(ad_aug(x1), ad_aug(x2)));
2912 }
2913 
2914 Writer max(const Writer &x1, const Writer &x2) {
2915  return "max"
2916  "(" +
2917  x1 + "," + x2 + ")";
2918 }
2919 const char *MaxOp::op_name() { return "MaxOp"; }
2920 ad_plain max(const ad_plain &x1, const ad_plain &x2) {
2921  return get_glob()->add_to_stack<MaxOp>(x1, x2);
2922 }
2923 ad_aug max(const ad_aug &x1, const ad_aug &x2) {
2924  if (x1.constant() && x2.constant())
2925  return Scalar(max(x1.Value(), x2.Value()));
2926  else
2927  return max(ad_plain(x1), ad_plain(x2));
2928 }
2929 ad_adapt max(const ad_adapt &x1, const ad_adapt &x2) {
2930  return ad_adapt(max(ad_aug(x1), ad_aug(x2)));
2931 }
2932 
2933 Writer min(const Writer &x1, const Writer &x2) {
2934  return "min"
2935  "(" +
2936  x1 + "," + x2 + ")";
2937 }
2938 const char *MinOp::op_name() { return "MinOp"; }
2939 ad_plain min(const ad_plain &x1, const ad_plain &x2) {
2940  return get_glob()->add_to_stack<MinOp>(x1, x2);
2941 }
2942 ad_aug min(const ad_aug &x1, const ad_aug &x2) {
2943  if (x1.constant() && x2.constant())
2944  return Scalar(min(x1.Value(), x2.Value()));
2945  else
2946  return min(ad_plain(x1), ad_plain(x2));
2947 }
2948 ad_adapt min(const ad_adapt &x1, const ad_adapt &x2) {
2949  return ad_adapt(min(ad_aug(x1), ad_aug(x2)));
2950 }
2951 void CondExpEqOp::forward(ForwardArgs<Scalar> &args) {
2952  if (args.x(0) == args.x(1)) {
2953  args.y(0) = args.x(2);
2954  } else {
2955  args.y(0) = args.x(3);
2956  }
2957 }
2958 void CondExpEqOp::reverse(ReverseArgs<Scalar> &args) {
2959  if (args.x(0) == args.x(1)) {
2960  args.dx(2) += args.dy(0);
2961  } else {
2962  args.dx(3) += args.dy(0);
2963  }
2964 }
2965 void CondExpEqOp::forward(ForwardArgs<Replay> &args) {
2966  args.y(0) = CondExpEq(args.x(0), args.x(1), args.x(2), args.x(3));
2967 }
2968 void CondExpEqOp::reverse(ReverseArgs<Replay> &args) {
2969  Replay zero(0);
2970  args.dx(2) += CondExpEq(args.x(0), args.x(1), args.dy(0), zero);
2971  args.dx(3) += CondExpEq(args.x(0), args.x(1), zero, args.dy(0));
2972 }
2973 void CondExpEqOp::forward(ForwardArgs<Writer> &args) {
2974  Writer w;
2975  w << "if (" << args.x(0) << "==" << args.x(1) << ") ";
2976  args.y(0) = args.x(2);
2977  w << " else ";
2978  args.y(0) = args.x(3);
2979 }
2980 void CondExpEqOp::reverse(ReverseArgs<Writer> &args) {
2981  Writer w;
2982  w << "if (" << args.x(0) << "==" << args.x(1) << ") ";
2983  args.dx(2) += args.dy(0);
2984  w << " else ";
2985  args.dx(3) += args.dy(0);
2986 }
2987 const char *CondExpEqOp::op_name() {
2988  return "CExp"
2989  "Eq";
2990 }
2991 Scalar CondExpEq(const Scalar &x0, const Scalar &x1, const Scalar &x2,
2992  const Scalar &x3) {
2993  if (x0 == x1)
2994  return x2;
2995  else
2996  return x3;
2997 }
2998 ad_plain CondExpEq(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
2999  const ad_plain &x3) {
3000  OperatorPure *pOp = get_glob()->getOperator<CondExpEqOp>();
3001  std::vector<ad_plain> x(4);
3002  x[0] = x0;
3003  x[1] = x1;
3004  x[2] = x2;
3005  x[3] = x3;
3006  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpEqOp>(pOp, x);
3007  return y[0];
3008 }
3009 ad_aug CondExpEq(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3010  const ad_aug &x3) {
3011  if (x0.constant() && x1.constant()) {
3012  if (x0.Value() == x1.Value())
3013  return x2;
3014  else
3015  return x3;
3016  } else {
3017  return CondExpEq(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3018  }
3019 }
3020 void CondExpNeOp::forward(ForwardArgs<Scalar> &args) {
3021  if (args.x(0) != args.x(1)) {
3022  args.y(0) = args.x(2);
3023  } else {
3024  args.y(0) = args.x(3);
3025  }
3026 }
3027 void CondExpNeOp::reverse(ReverseArgs<Scalar> &args) {
3028  if (args.x(0) != args.x(1)) {
3029  args.dx(2) += args.dy(0);
3030  } else {
3031  args.dx(3) += args.dy(0);
3032  }
3033 }
3034 void CondExpNeOp::forward(ForwardArgs<Replay> &args) {
3035  args.y(0) = CondExpNe(args.x(0), args.x(1), args.x(2), args.x(3));
3036 }
3037 void CondExpNeOp::reverse(ReverseArgs<Replay> &args) {
3038  Replay zero(0);
3039  args.dx(2) += CondExpNe(args.x(0), args.x(1), args.dy(0), zero);
3040  args.dx(3) += CondExpNe(args.x(0), args.x(1), zero, args.dy(0));
3041 }
3042 void CondExpNeOp::forward(ForwardArgs<Writer> &args) {
3043  Writer w;
3044  w << "if (" << args.x(0) << "!=" << args.x(1) << ") ";
3045  args.y(0) = args.x(2);
3046  w << " else ";
3047  args.y(0) = args.x(3);
3048 }
3049 void CondExpNeOp::reverse(ReverseArgs<Writer> &args) {
3050  Writer w;
3051  w << "if (" << args.x(0) << "!=" << args.x(1) << ") ";
3052  args.dx(2) += args.dy(0);
3053  w << " else ";
3054  args.dx(3) += args.dy(0);
3055 }
3056 const char *CondExpNeOp::op_name() {
3057  return "CExp"
3058  "Ne";
3059 }
3060 Scalar CondExpNe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3061  const Scalar &x3) {
3062  if (x0 != x1)
3063  return x2;
3064  else
3065  return x3;
3066 }
3067 ad_plain CondExpNe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3068  const ad_plain &x3) {
3069  OperatorPure *pOp = get_glob()->getOperator<CondExpNeOp>();
3070  std::vector<ad_plain> x(4);
3071  x[0] = x0;
3072  x[1] = x1;
3073  x[2] = x2;
3074  x[3] = x3;
3075  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpNeOp>(pOp, x);
3076  return y[0];
3077 }
3078 ad_aug CondExpNe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3079  const ad_aug &x3) {
3080  if (x0.constant() && x1.constant()) {
3081  if (x0.Value() != x1.Value())
3082  return x2;
3083  else
3084  return x3;
3085  } else {
3086  return CondExpNe(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3087  }
3088 }
3089 void CondExpGtOp::forward(ForwardArgs<Scalar> &args) {
3090  if (args.x(0) > args.x(1)) {
3091  args.y(0) = args.x(2);
3092  } else {
3093  args.y(0) = args.x(3);
3094  }
3095 }
3096 void CondExpGtOp::reverse(ReverseArgs<Scalar> &args) {
3097  if (args.x(0) > args.x(1)) {
3098  args.dx(2) += args.dy(0);
3099  } else {
3100  args.dx(3) += args.dy(0);
3101  }
3102 }
3103 void CondExpGtOp::forward(ForwardArgs<Replay> &args) {
3104  args.y(0) = CondExpGt(args.x(0), args.x(1), args.x(2), args.x(3));
3105 }
3106 void CondExpGtOp::reverse(ReverseArgs<Replay> &args) {
3107  Replay zero(0);
3108  args.dx(2) += CondExpGt(args.x(0), args.x(1), args.dy(0), zero);
3109  args.dx(3) += CondExpGt(args.x(0), args.x(1), zero, args.dy(0));
3110 }
3111 void CondExpGtOp::forward(ForwardArgs<Writer> &args) {
3112  Writer w;
3113  w << "if (" << args.x(0) << ">" << args.x(1) << ") ";
3114  args.y(0) = args.x(2);
3115  w << " else ";
3116  args.y(0) = args.x(3);
3117 }
3118 void CondExpGtOp::reverse(ReverseArgs<Writer> &args) {
3119  Writer w;
3120  w << "if (" << args.x(0) << ">" << args.x(1) << ") ";
3121  args.dx(2) += args.dy(0);
3122  w << " else ";
3123  args.dx(3) += args.dy(0);
3124 }
3125 const char *CondExpGtOp::op_name() {
3126  return "CExp"
3127  "Gt";
3128 }
3129 Scalar CondExpGt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3130  const Scalar &x3) {
3131  if (x0 > x1)
3132  return x2;
3133  else
3134  return x3;
3135 }
3136 ad_plain CondExpGt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3137  const ad_plain &x3) {
3138  OperatorPure *pOp = get_glob()->getOperator<CondExpGtOp>();
3139  std::vector<ad_plain> x(4);
3140  x[0] = x0;
3141  x[1] = x1;
3142  x[2] = x2;
3143  x[3] = x3;
3144  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpGtOp>(pOp, x);
3145  return y[0];
3146 }
3147 ad_aug CondExpGt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3148  const ad_aug &x3) {
3149  if (x0.constant() && x1.constant()) {
3150  if (x0.Value() > x1.Value())
3151  return x2;
3152  else
3153  return x3;
3154  } else {
3155  return CondExpGt(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3156  }
3157 }
3158 void CondExpLtOp::forward(ForwardArgs<Scalar> &args) {
3159  if (args.x(0) < args.x(1)) {
3160  args.y(0) = args.x(2);
3161  } else {
3162  args.y(0) = args.x(3);
3163  }
3164 }
3165 void CondExpLtOp::reverse(ReverseArgs<Scalar> &args) {
3166  if (args.x(0) < args.x(1)) {
3167  args.dx(2) += args.dy(0);
3168  } else {
3169  args.dx(3) += args.dy(0);
3170  }
3171 }
3172 void CondExpLtOp::forward(ForwardArgs<Replay> &args) {
3173  args.y(0) = CondExpLt(args.x(0), args.x(1), args.x(2), args.x(3));
3174 }
3175 void CondExpLtOp::reverse(ReverseArgs<Replay> &args) {
3176  Replay zero(0);
3177  args.dx(2) += CondExpLt(args.x(0), args.x(1), args.dy(0), zero);
3178  args.dx(3) += CondExpLt(args.x(0), args.x(1), zero, args.dy(0));
3179 }
3180 void CondExpLtOp::forward(ForwardArgs<Writer> &args) {
3181  Writer w;
3182  w << "if (" << args.x(0) << "<" << args.x(1) << ") ";
3183  args.y(0) = args.x(2);
3184  w << " else ";
3185  args.y(0) = args.x(3);
3186 }
3187 void CondExpLtOp::reverse(ReverseArgs<Writer> &args) {
3188  Writer w;
3189  w << "if (" << args.x(0) << "<" << args.x(1) << ") ";
3190  args.dx(2) += args.dy(0);
3191  w << " else ";
3192  args.dx(3) += args.dy(0);
3193 }
3194 const char *CondExpLtOp::op_name() {
3195  return "CExp"
3196  "Lt";
3197 }
3198 Scalar CondExpLt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3199  const Scalar &x3) {
3200  if (x0 < x1)
3201  return x2;
3202  else
3203  return x3;
3204 }
3205 ad_plain CondExpLt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3206  const ad_plain &x3) {
3207  OperatorPure *pOp = get_glob()->getOperator<CondExpLtOp>();
3208  std::vector<ad_plain> x(4);
3209  x[0] = x0;
3210  x[1] = x1;
3211  x[2] = x2;
3212  x[3] = x3;
3213  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpLtOp>(pOp, x);
3214  return y[0];
3215 }
3216 ad_aug CondExpLt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3217  const ad_aug &x3) {
3218  if (x0.constant() && x1.constant()) {
3219  if (x0.Value() < x1.Value())
3220  return x2;
3221  else
3222  return x3;
3223  } else {
3224  return CondExpLt(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3225  }
3226 }
3227 void CondExpGeOp::forward(ForwardArgs<Scalar> &args) {
3228  if (args.x(0) >= args.x(1)) {
3229  args.y(0) = args.x(2);
3230  } else {
3231  args.y(0) = args.x(3);
3232  }
3233 }
3234 void CondExpGeOp::reverse(ReverseArgs<Scalar> &args) {
3235  if (args.x(0) >= args.x(1)) {
3236  args.dx(2) += args.dy(0);
3237  } else {
3238  args.dx(3) += args.dy(0);
3239  }
3240 }
3241 void CondExpGeOp::forward(ForwardArgs<Replay> &args) {
3242  args.y(0) = CondExpGe(args.x(0), args.x(1), args.x(2), args.x(3));
3243 }
3244 void CondExpGeOp::reverse(ReverseArgs<Replay> &args) {
3245  Replay zero(0);
3246  args.dx(2) += CondExpGe(args.x(0), args.x(1), args.dy(0), zero);
3247  args.dx(3) += CondExpGe(args.x(0), args.x(1), zero, args.dy(0));
3248 }
3249 void CondExpGeOp::forward(ForwardArgs<Writer> &args) {
3250  Writer w;
3251  w << "if (" << args.x(0) << ">=" << args.x(1) << ") ";
3252  args.y(0) = args.x(2);
3253  w << " else ";
3254  args.y(0) = args.x(3);
3255 }
3256 void CondExpGeOp::reverse(ReverseArgs<Writer> &args) {
3257  Writer w;
3258  w << "if (" << args.x(0) << ">=" << args.x(1) << ") ";
3259  args.dx(2) += args.dy(0);
3260  w << " else ";
3261  args.dx(3) += args.dy(0);
3262 }
3263 const char *CondExpGeOp::op_name() {
3264  return "CExp"
3265  "Ge";
3266 }
3267 Scalar CondExpGe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3268  const Scalar &x3) {
3269  if (x0 >= x1)
3270  return x2;
3271  else
3272  return x3;
3273 }
3274 ad_plain CondExpGe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3275  const ad_plain &x3) {
3276  OperatorPure *pOp = get_glob()->getOperator<CondExpGeOp>();
3277  std::vector<ad_plain> x(4);
3278  x[0] = x0;
3279  x[1] = x1;
3280  x[2] = x2;
3281  x[3] = x3;
3282  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpGeOp>(pOp, x);
3283  return y[0];
3284 }
3285 ad_aug CondExpGe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3286  const ad_aug &x3) {
3287  if (x0.constant() && x1.constant()) {
3288  if (x0.Value() >= x1.Value())
3289  return x2;
3290  else
3291  return x3;
3292  } else {
3293  return CondExpGe(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3294  }
3295 }
3296 void CondExpLeOp::forward(ForwardArgs<Scalar> &args) {
3297  if (args.x(0) <= args.x(1)) {
3298  args.y(0) = args.x(2);
3299  } else {
3300  args.y(0) = args.x(3);
3301  }
3302 }
3303 void CondExpLeOp::reverse(ReverseArgs<Scalar> &args) {
3304  if (args.x(0) <= args.x(1)) {
3305  args.dx(2) += args.dy(0);
3306  } else {
3307  args.dx(3) += args.dy(0);
3308  }
3309 }
3310 void CondExpLeOp::forward(ForwardArgs<Replay> &args) {
3311  args.y(0) = CondExpLe(args.x(0), args.x(1), args.x(2), args.x(3));
3312 }
3313 void CondExpLeOp::reverse(ReverseArgs<Replay> &args) {
3314  Replay zero(0);
3315  args.dx(2) += CondExpLe(args.x(0), args.x(1), args.dy(0), zero);
3316  args.dx(3) += CondExpLe(args.x(0), args.x(1), zero, args.dy(0));
3317 }
3318 void CondExpLeOp::forward(ForwardArgs<Writer> &args) {
3319  Writer w;
3320  w << "if (" << args.x(0) << "<=" << args.x(1) << ") ";
3321  args.y(0) = args.x(2);
3322  w << " else ";
3323  args.y(0) = args.x(3);
3324 }
3325 void CondExpLeOp::reverse(ReverseArgs<Writer> &args) {
3326  Writer w;
3327  w << "if (" << args.x(0) << "<=" << args.x(1) << ") ";
3328  args.dx(2) += args.dy(0);
3329  w << " else ";
3330  args.dx(3) += args.dy(0);
3331 }
3332 const char *CondExpLeOp::op_name() {
3333  return "CExp"
3334  "Le";
3335 }
3336 Scalar CondExpLe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3337  const Scalar &x3) {
3338  if (x0 <= x1)
3339  return x2;
3340  else
3341  return x3;
3342 }
3343 ad_plain CondExpLe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3344  const ad_plain &x3) {
3345  OperatorPure *pOp = get_glob()->getOperator<CondExpLeOp>();
3346  std::vector<ad_plain> x(4);
3347  x[0] = x0;
3348  x[1] = x1;
3349  x[2] = x2;
3350  x[3] = x3;
3351  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpLeOp>(pOp, x);
3352  return y[0];
3353 }
3354 ad_aug CondExpLe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3355  const ad_aug &x3) {
3356  if (x0.constant() && x1.constant()) {
3357  if (x0.Value() <= x1.Value())
3358  return x2;
3359  else
3360  return x3;
3361  } else {
3362  return CondExpLe(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3363  }
3364 }
3365 
3366 Index SumOp::input_size() const { return n; }
3367 
3368 Index SumOp::output_size() const { return 1; }
3369 
3370 SumOp::SumOp(size_t n) : n(n) {}
3371 
3372 const char *SumOp::op_name() { return "SumOp"; }
3373 
3374 Index LogSpaceSumOp::input_size() const { return this->n; }
3375 
3376 Index LogSpaceSumOp::output_size() const { return 1; }
3377 
3378 LogSpaceSumOp::LogSpaceSumOp(size_t n) : n(n) {}
3379 
3380 void LogSpaceSumOp::forward(ForwardArgs<Scalar> &args) {
3381  Scalar Max = -INFINITY;
3382  for (size_t i = 0; i < n; i++) {
3383  if (Max < args.x(i)) Max = args.x(i);
3384  }
3385  args.y(0) = 0;
3386  for (size_t i = 0; i < n; i++) {
3387  args.y(0) += exp(args.x(i) - Max);
3388  }
3389  args.y(0) = Max + log(args.y(0));
3390 }
3391 
3392 void LogSpaceSumOp::forward(ForwardArgs<Replay> &args) {
3393  std::vector<ad_plain> x(input_size());
3394  for (Index i = 0; i < input_size(); i++) x[i] = args.x(i);
3395  args.y(0) = logspace_sum(x);
3396 }
3397 
3398 const char *LogSpaceSumOp::op_name() { return "LSSumOp"; }
3399 
3400 ad_plain logspace_sum(const std::vector<ad_plain> &x) {
3401  OperatorPure *pOp = get_glob()->getOperator<LogSpaceSumOp>(x.size());
3402  return get_glob()->add_to_stack<LogSpaceSumOp>(pOp, x)[0];
3403 }
3404 
3405 Index LogSpaceSumStrideOp::number_of_terms() const { return stride.size(); }
3406 
3407 Index LogSpaceSumStrideOp::input_size() const { return number_of_terms(); }
3408 
3409 Index LogSpaceSumStrideOp::output_size() const { return 1; }
3410 
3411 LogSpaceSumStrideOp::LogSpaceSumStrideOp(std::vector<Index> stride, size_t n)
3412  : stride(stride), n(n) {}
3413 
3414 void LogSpaceSumStrideOp::forward(ForwardArgs<Scalar> &args) {
3415  Scalar Max = -INFINITY;
3416 
3417  size_t m = stride.size();
3418  std::vector<Scalar *> wrk(m);
3419  Scalar **px = &(wrk[0]);
3420  for (size_t i = 0; i < m; i++) {
3421  px[i] = args.x_ptr(i);
3422  }
3423 
3424  for (size_t i = 0; i < n; i++) {
3425  Scalar s = rowsum(px, i);
3426  if (Max < s) Max = s;
3427  }
3428 
3429  args.y(0) = 0;
3430  for (size_t i = 0; i < n; i++) {
3431  Scalar s = rowsum(px, i);
3432  args.y(0) += exp(s - Max);
3433  }
3434  args.y(0) = Max + log(args.y(0));
3435 }
3436 
3437 void LogSpaceSumStrideOp::forward(ForwardArgs<Replay> &args) {
3438  std::vector<ad_plain> x(input_size());
3439  for (Index i = 0; i < input_size(); i++) x[i] = args.x(i);
3440  args.y(0) = logspace_sum_stride(x, stride, n);
3441 }
3442 
3443 void LogSpaceSumStrideOp::dependencies(Args<> &args, Dependencies &dep) const {
3444  for (size_t j = 0; j < (size_t)number_of_terms(); j++) {
3445  size_t K = n * stride[j];
3446  dep.add_segment(args.input(j), K);
3447  }
3448 }
3449 
3450 const char *LogSpaceSumStrideOp::op_name() { return "LSStride"; }
3451 
3452 void LogSpaceSumStrideOp::forward(ForwardArgs<Writer> &args) {
3453  TMBAD_ASSERT(false);
3454 }
3455 
3456 void LogSpaceSumStrideOp::reverse(ReverseArgs<Writer> &args) {
3457  TMBAD_ASSERT(false);
3458 }
3459 
3460 ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
3461  const std::vector<Index> &stride, size_t n) {
3462  TMBAD_ASSERT(x.size() == stride.size());
3463  OperatorPure *pOp = get_glob()->getOperator<LogSpaceSumStrideOp>(stride, n);
3464  return get_glob()->add_to_stack<LogSpaceSumStrideOp>(pOp, x)[0];
3465 }
3466 } // namespace TMBad
3467 // Autogenerated - do not edit by hand !
3468 #include "graph2dot.hpp"
3469 namespace TMBad {
3470 
3471 void graph2dot(global glob, graph G, bool show_id, std::ostream &cout) {
3472  cout << "digraph graphname {\n";
3473  for (size_t i = 0; i < glob.opstack.size(); i++) {
3474  if (!show_id)
3475  cout << i << " [label=\"" << glob.opstack[i]->op_name() << "\"];\n";
3476  else
3477  cout << i << " [label=\"" << glob.opstack[i]->op_name() << " " << i
3478  << "\"];\n";
3479  }
3480  for (size_t node = 0; node < G.num_nodes(); node++) {
3481  for (size_t k = 0; k < G.num_neighbors(node); k++) {
3482  cout << node << " -> " << G.neighbors(node)[k] << ";\n";
3483  }
3484  }
3485  for (size_t i = 0; i < glob.subgraph_seq.size(); i++) {
3486  size_t node = glob.subgraph_seq[i];
3487  cout << node << " [style=\"filled\"];\n";
3488  }
3489 
3490  std::vector<Index> v2o = glob.var2op();
3491 
3492  cout << "{rank=same;";
3493  for (size_t i = 0; i < glob.inv_index.size(); i++) {
3494  cout << v2o[glob.inv_index[i]] << ";";
3495  }
3496  cout << "}\n";
3497 
3498  cout << "{rank=same;";
3499  for (size_t i = 0; i < glob.dep_index.size(); i++) {
3500  cout << v2o[glob.dep_index[i]] << ";";
3501  }
3502  cout << "}\n";
3503 
3504  cout << "}\n";
3505 }
3506 
3507 void graph2dot(global glob, bool show_id, std::ostream &cout) {
3508  graph G = glob.forward_graph();
3509  graph2dot(glob, G, show_id, cout);
3510 }
3511 
3512 void graph2dot(const char *filename, global glob, graph G, bool show_id) {
3513  std::ofstream myfile;
3514  myfile.open(filename);
3515  graph2dot(glob, G, show_id, myfile);
3516  myfile.close();
3517 }
3518 
3519 void graph2dot(const char *filename, global glob, bool show_id) {
3520  std::ofstream myfile;
3521  myfile.open(filename);
3522  graph2dot(glob, show_id, myfile);
3523  myfile.close();
3524 }
3525 } // namespace TMBad
3526 // Autogenerated - do not edit by hand !
3527 #include "graph_transform.hpp"
3528 namespace TMBad {
3529 
3530 std::vector<size_t> which(const std::vector<bool> &x) {
3531  return which<size_t>(x);
3532 }
3533 
3534 size_t prod_int(const std::vector<size_t> &x) {
3535  size_t ans = 1;
3536  for (size_t i = 0; i < x.size(); i++) ans *= x[i];
3537  return ans;
3538 }
3539 
3540 std::vector<bool> reverse_boundary(global &glob,
3541  const std::vector<bool> &vars) {
3542  std::vector<bool> boundary(vars);
3543  std::vector<bool> node_filter = glob.var2op(vars);
3544  glob.reverse_sub(boundary, node_filter);
3545 
3546  for (size_t i = 0; i < vars.size(); i++) boundary[i] = boundary[i] ^ vars[i];
3547  return boundary;
3548 }
3549 
3550 std::vector<Index> get_accumulation_tree(global &glob, bool boundary) {
3551  std::vector<OperatorPure *> &opstack = glob.opstack;
3552 
3553  std::vector<bool> node_subset(opstack.size(), false);
3554  for (size_t i = 0; i < opstack.size(); i++) {
3555  node_subset[i] = opstack[i]->info().test(op_info::is_linear);
3556  }
3557 
3558  node_subset.flip();
3559 
3560  std::vector<bool> var_subset = glob.op2var(node_subset);
3561 
3562  glob.reverse(var_subset);
3563 
3564  var_subset.flip();
3565 
3566  if (boundary) var_subset = reverse_boundary(glob, var_subset);
3567 
3568  node_subset = glob.var2op(var_subset);
3569 
3570  return which<Index>(node_subset);
3571 }
3572 
3573 std::vector<Index> find_op_by_name(global &glob, const char *name) {
3574  std::vector<Index> ans;
3575  std::vector<OperatorPure *> &opstack = glob.opstack;
3576  for (size_t i = 0; i < opstack.size(); i++) {
3577  if (!strcmp(opstack[i]->op_name(), name)) {
3578  ans.push_back(i);
3579  }
3580  }
3581  return ans;
3582 }
3583 
3584 std::vector<Index> substitute(global &glob, const std::vector<Index> &seq,
3585  bool inv_tags, bool dep_tags) {
3586  std::vector<OperatorPure *> &opstack = glob.opstack;
3587  std::vector<Index> seq2(seq);
3588  make_space_inplace(opstack, seq2);
3589  OperatorPure *invop = glob.getOperator<global::InvOp>();
3590  for (size_t i = 0; i < seq2.size(); i++) {
3591  OperatorPure *op = opstack[seq2[i]];
3592  if (inv_tags) TMBAD_ASSERT(op != invop);
3593  size_t nin = op->input_size();
3594  size_t nou = op->output_size();
3595  opstack[seq2[i] - 1] = glob.getOperator<global::NullOp2>(nin, 0);
3596  opstack[seq2[i]] = glob.getOperator<global::NullOp2>(0, nou);
3597  op->deallocate();
3598  }
3600  std::vector<Index> new_inv = glob.op2var(seq2);
3601  if (!inv_tags) glob.inv_index.resize(0);
3602  if (!dep_tags) glob.dep_index.resize(0);
3603  glob.inv_index.insert(glob.inv_index.end(), new_inv.begin(), new_inv.end());
3604  return new_inv;
3605 }
3606 
3607 std::vector<Index> substitute(global &glob, const char *name, bool inv_tags,
3608  bool dep_tags) {
3609  std::vector<Index> seq = find_op_by_name(glob, name);
3610  return substitute(glob, seq, inv_tags, dep_tags);
3611 }
3612 
3614  global glob_tree = glob;
3615 
3616  std::vector<Index> boundary = get_accumulation_tree(glob, true);
3617 
3618  substitute(glob_tree, boundary, false, true);
3619  glob_tree.eliminate();
3620 
3621  size_t n = glob_tree.inv_index.size();
3622 
3623  std::vector<Scalar> x0(n);
3624  for (size_t i = 0; i < n; i++) x0[i] = glob_tree.value_inv(i);
3625  glob_tree.forward();
3626  glob_tree.clear_deriv();
3627  glob_tree.deriv_dep(0) = 1;
3628  glob_tree.reverse();
3629  Scalar V = glob_tree.value_dep(0);
3630  std::vector<Scalar> J(n);
3631  for (size_t i = 0; i < n; i++) J[i] = glob_tree.deriv_inv(i);
3632 
3633  for (size_t i = 0; i < n; i++) V -= J[i] * x0[i];
3634 
3635  std::vector<Index> vars = glob.op2var(boundary);
3636  glob.dep_index.resize(0);
3637  glob.ad_start();
3638  std::vector<ad_aug_index> res(vars.begin(), vars.end());
3639  for (size_t i = 0; i < vars.size(); i++) {
3640  res[i] = res[i] * J[i];
3641  if (i == 0) res[i] += V;
3642  if (!sum_) res[i].Dependent();
3643  }
3644  if (sum_) {
3645  ad_aug sum_res = sum(res);
3646  sum_res.Dependent();
3647  }
3648  glob.ad_stop();
3649  glob.eliminate();
3650  return glob;
3651 }
3652 
3653 void aggregate(global &glob, int sign) {
3654  TMBAD_ASSERT((sign == 1) || (sign == -1));
3655  glob.ad_start();
3656  std::vector<ad_aug_index> x(glob.dep_index.begin(), glob.dep_index.end());
3657  ad_aug y = 0;
3658  for (size_t i = 0; i < x.size(); i++) y += x[i];
3659  if (sign < 0) y = -y;
3660  glob.dep_index.resize(0);
3661  y.Dependent();
3662  glob.ad_stop();
3663 }
3664 
3665 old_state::old_state(global &glob) : glob(glob) {
3666  dep_index = glob.dep_index;
3667  opstack_size = glob.opstack.size();
3668 }
3669 
3670 void old_state::restore() {
3671  glob.dep_index = dep_index;
3672  while (glob.opstack.size() > opstack_size) {
3673  Index input_size = glob.opstack.back()->input_size();
3674  Index output_size = glob.opstack.back()->output_size();
3675  glob.inputs.resize(glob.inputs.size() - input_size);
3676  glob.values.resize(glob.values.size() - output_size);
3677  glob.opstack.back()->deallocate();
3678  glob.opstack.pop_back();
3679  }
3680 }
3681 
3682 term_info::term_info(global &glob, bool do_init) : glob(glob) {
3683  if (do_init) initialize();
3684 }
3685 
3686 void term_info::initialize(std::vector<Index> inv_remap) {
3687  if (inv_remap.size() == 0) inv_remap.resize(glob.inv_index.size(), 0);
3688  inv_remap = radix::factor<Index>(inv_remap);
3689  std::vector<Index> remap = remap_identical_sub_expressions(glob, inv_remap);
3690  std::vector<Index> term_ids = subset(remap, glob.dep_index);
3691  id = radix::factor<Index>(term_ids);
3692  Index max_id = *std::max_element(id.begin(), id.end());
3693  count.resize(max_id + 1, 0);
3694  for (size_t i = 0; i < id.size(); i++) {
3695  count[id[i]]++;
3696  }
3697 }
3698 
3699 gk_config::gk_config()
3700  : debug(false), adaptive(false), nan2zero(true), ytol(1e-2), dx(1) {}
3701 
3703  size_t count = 1;
3704  for (size_t i = 0; i < bound.size(); i++)
3705  if (mask_[i]) count *= bound[i];
3706  return count;
3707 }
3708 
3709 multivariate_index::multivariate_index(size_t bound_, size_t dim, bool flag)
3710  : pointer(0) {
3711  bound.resize(dim, bound_);
3712  x.resize(dim, 0);
3713  mask_.resize(dim, flag);
3714 }
3715 
3716 multivariate_index::multivariate_index(std::vector<size_t> bound, bool flag)
3717  : pointer(0), bound(bound) {
3718  x.resize(bound.size(), 0);
3719  mask_.resize(bound.size(), flag);
3720 }
3721 
3722 void multivariate_index::flip() { mask_.flip(); }
3723 
3725  size_t N = 1;
3726  for (size_t i = 0; i < x.size(); i++) {
3727  if (mask_[i]) {
3728  if (x[i] < bound[i] - 1) {
3729  x[i]++;
3730  pointer += N;
3731  break;
3732  } else {
3733  x[i] = 0;
3734  pointer -= (bound[i] - 1) * N;
3735  }
3736  }
3737  N *= bound[i];
3738  }
3739  return *this;
3740 }
3741 
3742 multivariate_index::operator size_t() { return pointer; }
3743 
3744 size_t multivariate_index::index(size_t i) { return x[i]; }
3745 
3746 std::vector<size_t> multivariate_index::index() { return x; }
3747 
3748 std::vector<bool>::reference multivariate_index::mask(size_t i) {
3749  return mask_[i];
3750 }
3751 
3752 void multivariate_index::set_mask(const std::vector<bool> &mask) {
3753  TMBAD_ASSERT(mask.size() == mask_.size());
3754  mask_ = mask;
3755 }
3756 
3757 size_t clique::clique_size() { return indices.size(); }
3758 
3759 clique::clique() {}
3760 
3761 void clique::subset_inplace(const std::vector<bool> &mask) {
3762  indices = subset(indices, mask);
3763  dim = subset(dim, mask);
3764 }
3765 
3766 void clique::logsum_init() { logsum.resize(prod_int(dim)); }
3767 
3768 bool clique::empty() const { return (indices.size() == 0); }
3769 
3770 bool clique::contains(Index i) {
3771  bool ans = false;
3772  for (size_t j = 0; j < indices.size(); j++) ans |= (i == indices[j]);
3773  return ans;
3774 }
3775 
3776 void clique::get_stride(const clique &super, Index ind,
3777  std::vector<ad_plain> &offset, Index &stride) {
3778  stride = 1;
3779  for (size_t k = 0; (k < clique_size()) && (indices[k] < ind); k++) {
3780  stride *= dim[k];
3781  }
3782 
3783  multivariate_index mv(super.dim);
3784  size_t nx = mv.count();
3785  std::vector<bool> mask = lmatch(super.indices, this->indices);
3786  mask.flip();
3787  mv.set_mask(mask);
3788  std::vector<ad_plain> x(nx);
3789  size_t xa_count = mv.count();
3790  mv.flip();
3791  size_t xi_count = mv.count();
3792  mv.flip();
3793  TMBAD_ASSERT(x.size() == xa_count * xi_count);
3794  for (size_t i = 0; i < xa_count; i++, ++mv) {
3795  mv.flip();
3796  for (size_t j = 0; j < xi_count; j++, ++mv) {
3797  TMBAD_ASSERT(logsum[j].on_some_tape());
3798  x[mv] = logsum[j];
3799  }
3800  mv.flip();
3801  }
3802 
3803  mv = multivariate_index(super.dim);
3804  mask = lmatch(super.indices, std::vector<Index>(1, ind));
3805  mask.flip();
3806  mv.set_mask(mask);
3807 
3808  xa_count = mv.count();
3809  offset.resize(xa_count);
3810  for (size_t i = 0; i < xa_count; i++, ++mv) {
3811  offset[i] = x[mv];
3812  }
3813 }
3814 
3815 sr_grid::sr_grid() {}
3816 
3817 sr_grid::sr_grid(Scalar a, Scalar b, size_t n) : x(n), w(n) {
3818  Scalar h = (b - a) / n;
3819  for (size_t i = 0; i < n; i++) {
3820  x[i] = a + h / 2 + i * h;
3821  w[i] = h;
3822  }
3823 }
3824 
3825 sr_grid::sr_grid(size_t n) {
3826  for (size_t i = 0; i < n; i++) {
3827  x[i] = i;
3828  w[i] = 1. / (double)n;
3829  }
3830 }
3831 
3832 size_t sr_grid::size() { return x.size(); }
3833 
3834 ad_plain sr_grid::logw_offset() {
3835  if (logw.size() != w.size()) {
3836  logw.resize(w.size());
3837  for (size_t i = 0; i < w.size(); i++) logw[i] = log(w[i]);
3838  forceContiguous(logw);
3839  }
3840  return logw[0];
3841 }
3842 
3844  std::vector<Index> random,
3845  std::vector<sr_grid> grid,
3846  std::vector<Index> random2grid,
3847  bool perm)
3848  : grid(grid),
3849  glob(glob),
3850  random(random),
3851  replay(glob, new_glob),
3852  tinfo(glob, false) {
3853  inv2grid.resize(glob.inv_index.size(), 0);
3854  for (size_t i = 0; i < random2grid.size(); i++) {
3855  inv2grid[random[i]] = random2grid[i];
3856  }
3857 
3858  mark.resize(glob.values.size(), false);
3859  for (size_t i = 0; i < random.size(); i++)
3860  mark[glob.inv_index[random[i]]] = true;
3861  glob.forward(mark);
3862 
3863  forward_graph = glob.forward_graph(mark);
3864  reverse_graph = glob.reverse_graph(mark);
3865 
3866  glob.subgraph_cache_ptr();
3867 
3868  var_remap.resize(glob.values.size());
3869 
3870  op2inv_idx = glob.op2idx(glob.inv_index, NA);
3871  op2dep_idx = glob.op2idx(glob.dep_index, NA);
3872 
3873  if (perm) reorder_random();
3874 
3875  terms_done.resize(glob.dep_index.size(), false);
3876 
3877  std::vector<Index> inv_remap(glob.inv_index.size());
3878  for (size_t i = 0; i < inv_remap.size(); i++) inv_remap[i] = -(i + 1);
3879  for (size_t i = 0; i < random.size(); i++)
3880  inv_remap[random[i]] = inv2grid[random[i]];
3881  inv_remap = radix::factor<Index>(inv_remap);
3882  tinfo.initialize(inv_remap);
3883 }
3884 
3886  std::vector<IndexPair> edges;
3887  std::vector<Index> &inv2op = forward_graph.inv2op;
3888 
3889  for (size_t i = 0; i < random.size(); i++) {
3890  std::vector<Index> subgraph(1, inv2op[random[i]]);
3891  forward_graph.search(subgraph);
3892  reverse_graph.search(subgraph);
3893  for (size_t l = 0; l < subgraph.size(); l++) {
3894  Index inv_other = op2inv_idx[subgraph[l]];
3895  if (inv_other != NA) {
3896  IndexPair edge(random[i], inv_other);
3897  edges.push_back(edge);
3898  }
3899  }
3900  }
3901 
3902  size_t num_nodes = glob.inv_index.size();
3903  graph G(num_nodes, edges);
3904 
3905  std::vector<bool> visited(num_nodes, false);
3906  std::vector<Index> subgraph;
3907  for (size_t i = 0; i < random.size(); i++) {
3908  if (visited[random[i]]) continue;
3909  std::vector<Index> sg(1, random[i]);
3910  G.search(sg, visited, false, false);
3911  subgraph.insert(subgraph.end(), sg.begin(), sg.end());
3912  }
3913  std::reverse(subgraph.begin(), subgraph.end());
3914  TMBAD_ASSERT(random.size() == subgraph.size());
3915  random = subgraph;
3916 }
3917 
3918 std::vector<size_t> sequential_reduction::get_grid_bounds(
3919  std::vector<Index> inv_index) {
3920  std::vector<size_t> ans(inv_index.size());
3921  for (size_t i = 0; i < inv_index.size(); i++) {
3922  ans[i] = grid[inv2grid[inv_index[i]]].size();
3923  }
3924  return ans;
3925 }
3926 
3927 std::vector<sr_grid *> sequential_reduction::get_grid(
3928  std::vector<Index> inv_index) {
3929  std::vector<sr_grid *> ans(inv_index.size());
3930  for (size_t i = 0; i < inv_index.size(); i++) {
3931  ans[i] = &(grid[inv2grid[inv_index[i]]]);
3932  }
3933  return ans;
3934 }
3935 
3936 std::vector<ad_aug> sequential_reduction::tabulate(std::vector<Index> inv_index,
3937  Index dep_index) {
3938  size_t id = tinfo.id[dep_index];
3939  size_t count = tinfo.count[id];
3940  bool do_cache = (count >= 2);
3941  if (do_cache) {
3942  if (cache[id].size() > 0) {
3943  return cache[id];
3944  }
3945  }
3946 
3947  std::vector<sr_grid *> inv_grid = get_grid(inv_index);
3948  std::vector<size_t> grid_bounds = get_grid_bounds(inv_index);
3949  multivariate_index mv(grid_bounds);
3950  std::vector<ad_aug> ans(mv.count());
3951  for (size_t i = 0; i < ans.size(); i++, ++mv) {
3952  for (size_t j = 0; j < inv_index.size(); j++) {
3953  replay.value_inv(inv_index[j]) = inv_grid[j]->x[mv.index(j)];
3954  }
3955  replay.forward_sub();
3956  ans[i] = replay.value_dep(dep_index);
3957  }
3958 
3959  forceContiguous(ans);
3960  if (do_cache) {
3961  cache[id] = ans;
3962  }
3963  return ans;
3964 }
3965 
3967  std::vector<Index> super;
3968  size_t c = 0;
3969  for (std::list<clique>::iterator it = cliques.begin(); it != cliques.end();
3970  ++it) {
3971  if ((*it).contains(i)) {
3972  super.insert(super.end(), (*it).indices.begin(), (*it).indices.end());
3973  c++;
3974  }
3975  }
3976  sort_unique_inplace(super);
3977 
3978  std::vector<std::vector<ad_plain> > offset_by_clique(c);
3979  std::vector<Index> stride_by_clique(c);
3980  clique C;
3981  C.indices = super;
3982  C.dim = get_grid_bounds(super);
3983  std::list<clique>::iterator it = cliques.begin();
3984  c = 0;
3985  while (it != cliques.end()) {
3986  if ((*it).contains(i)) {
3987  (*it).get_stride(C, i, offset_by_clique[c], stride_by_clique[c]);
3988  it = cliques.erase(it);
3989  c++;
3990  } else {
3991  ++it;
3992  }
3993  }
3994 
3995  std::vector<bool> mask = lmatch(super, std::vector<Index>(1, i));
3996  mask.flip();
3997  C.subset_inplace(mask);
3998  C.logsum_init();
3999 
4000  grid[inv2grid[i]].logw_offset();
4001  size_t v_begin = get_glob()->values.size();
4002  for (size_t j = 0; j < C.logsum.size(); j++) {
4003  std::vector<ad_plain> x;
4004  std::vector<Index> stride;
4005  for (size_t k = 0; k < offset_by_clique.size(); k++) {
4006  x.push_back(offset_by_clique[k][j]);
4007  stride.push_back(stride_by_clique[k]);
4008  }
4009 
4010  x.push_back(grid[inv2grid[i]].logw_offset());
4011  stride.push_back(1);
4012  C.logsum[j] = logspace_sum_stride(x, stride, grid[inv2grid[i]].size());
4013  }
4014  size_t v_end = get_glob()->values.size();
4015  TMBAD_ASSERT(v_end - v_begin == C.logsum.size());
4016 
4017  cliques.push_back(C);
4018 }
4019 
4021  const std::vector<Index> &inv2op = forward_graph.inv2op;
4022 
4023  Index start_node = inv2op[i];
4024  std::vector<Index> subgraph(1, start_node);
4025  forward_graph.search(subgraph);
4026 
4027  std::vector<Index> dep_clique;
4028  std::vector<Index> subgraph_terms;
4029  for (size_t k = 0; k < subgraph.size(); k++) {
4030  Index node = subgraph[k];
4031  Index dep_idx = op2dep_idx[node];
4032  if (dep_idx != NA && !terms_done[dep_idx]) {
4033  terms_done[dep_idx] = true;
4034  subgraph_terms.push_back(node);
4035  dep_clique.push_back(dep_idx);
4036  }
4037  }
4038  for (size_t k = 0; k < subgraph_terms.size(); k++) {
4039  subgraph.resize(0);
4040  subgraph.push_back(subgraph_terms[k]);
4041 
4042  reverse_graph.search(subgraph);
4043 
4044  std::vector<Index> inv_clique;
4045  for (size_t l = 0; l < subgraph.size(); l++) {
4046  Index tmp = op2inv_idx[subgraph[l]];
4047  if (tmp != NA) inv_clique.push_back(tmp);
4048  }
4049 
4050  glob.subgraph_seq = subgraph;
4051 
4052  clique C;
4053  C.indices = inv_clique;
4054  C.dim = get_grid_bounds(inv_clique);
4055  C.logsum = tabulate(inv_clique, dep_clique[k]);
4056 
4057  cliques.push_back(C);
4058  }
4059 
4060  merge(i);
4061 }
4062 
4063 void sequential_reduction::show_cliques() {
4064  Rcout << "Cliques: ";
4065  std::list<clique>::iterator it;
4066  for (it = cliques.begin(); it != cliques.end(); ++it) {
4067  Rcout << it->indices << " ";
4068  }
4069  Rcout << "\n";
4070 }
4071 
4072 void sequential_reduction::update_all() {
4073  for (size_t i = 0; i < random.size(); i++) update(random[i]);
4074 }
4075 
4076 ad_aug sequential_reduction::get_result() {
4077  ad_aug ans = 0;
4078  std::list<clique>::iterator it;
4079  for (it = cliques.begin(); it != cliques.end(); ++it) {
4080  TMBAD_ASSERT(it->clique_size() == 0);
4081  TMBAD_ASSERT(it->logsum.size() == 1);
4082  ans += it->logsum[0];
4083  }
4084 
4085  for (size_t i = 0; i < terms_done.size(); i++) {
4086  if (!terms_done[i]) ans += replay.value_dep(i);
4087  }
4088  return ans;
4089 }
4090 
4091 global sequential_reduction::marginal() {
4092  replay.start();
4093  replay.forward(true, false);
4094  update_all();
4095  ad_aug ans = get_result();
4096  ans.Dependent();
4097  replay.stop();
4098  return new_glob;
4099 }
4100 
4101 autopar::autopar(global &glob, size_t num_threads)
4102  : glob(glob),
4103  num_threads(num_threads),
4104  do_aggregate(false),
4105  keep_all_inv(false) {
4106  reverse_graph = glob.reverse_graph();
4107 }
4108 
4109 std::vector<size_t> autopar::max_tree_depth() {
4110  std::vector<Index> max_tree_depth(glob.opstack.size(), 0);
4111  Dependencies dep;
4112  Args<> args(glob.inputs);
4113  for (size_t i = 0; i < glob.opstack.size(); i++) {
4114  dep.resize(0);
4115  glob.opstack[i]->dependencies(args, dep);
4116  for (size_t j = 0; j < dep.size(); j++) {
4117  max_tree_depth[i] = std::max(max_tree_depth[i], max_tree_depth[dep[j]]);
4118  }
4119 
4120  max_tree_depth[i]++;
4121 
4122  glob.opstack[i]->increment(args.ptr);
4123  }
4124  std::vector<size_t> ans(glob.dep_index.size());
4125  for (size_t j = 0; j < glob.dep_index.size(); j++) {
4126  ans[j] = max_tree_depth[glob.dep_index[j]];
4127  }
4128  return ans;
4129 }
4130 
4131 void autopar::run() {
4132  std::vector<size_t> ord = order(max_tree_depth());
4133  std::reverse(ord.begin(), ord.end());
4134  std::vector<bool> visited(glob.opstack.size(), false);
4135  std::vector<Index> start;
4136  std::vector<Index> dWork(ord.size());
4137  for (size_t i = 0; i < ord.size(); i++) {
4138  start.resize(1);
4139  start[0] = reverse_graph.dep2op[ord[i]];
4140  reverse_graph.search(start, visited, false, false);
4141  dWork[i] = start.size();
4142  if (false) {
4143  for (size_t k = 0; k < start.size(); k++) {
4144  Rcout << glob.opstack[start[k]]->op_name() << " ";
4145  }
4146  Rcout << "\n";
4147  }
4148  }
4149 
4150  std::vector<size_t> thread_assign(ord.size(), 0);
4151  std::vector<size_t> work_by_thread(num_threads, 0);
4152  for (size_t i = 0; i < dWork.size(); i++) {
4153  if (i == 0) {
4154  thread_assign[i] = 0;
4155  } else {
4156  if (dWork[i] <= 1)
4157  thread_assign[i] = thread_assign[i - 1];
4158  else
4159  thread_assign[i] = which_min(work_by_thread);
4160  }
4161  work_by_thread[thread_assign[i]] += dWork[i];
4162  }
4163 
4164  node_split.resize(num_threads);
4165  for (size_t i = 0; i < ord.size(); i++) {
4166  node_split[thread_assign[i]].push_back(reverse_graph.dep2op[ord[i]]);
4167  }
4168 
4169  for (size_t i = 0; i < num_threads; i++) {
4170  if (keep_all_inv)
4171  node_split[i].insert(node_split[i].begin(), reverse_graph.inv2op.begin(),
4172  reverse_graph.inv2op.end());
4173  reverse_graph.search(node_split[i]);
4174  }
4175 }
4176 
4178  vglob.resize(num_threads);
4179  inv_idx.resize(num_threads);
4180  dep_idx.resize(num_threads);
4181  std::vector<Index> tmp;
4182  for (size_t i = 0; i < num_threads; i++) {
4183  glob.subgraph_seq = node_split[i];
4184  vglob[i] = glob.extract_sub(tmp);
4185  if (do_aggregate) aggregate(vglob[i]);
4186  }
4187 
4188  Index NA = -1;
4189  std::vector<Index> op2inv_idx = glob.op2idx(glob.inv_index, NA);
4190  std::vector<Index> op2dep_idx = glob.op2idx(glob.dep_index, NA);
4191  for (size_t i = 0; i < num_threads; i++) {
4192  std::vector<Index> &seq = node_split[i];
4193  for (size_t j = 0; j < seq.size(); j++) {
4194  if (op2inv_idx[seq[j]] != NA) inv_idx[i].push_back(op2inv_idx[seq[j]]);
4195  if (op2dep_idx[seq[j]] != NA) dep_idx[i].push_back(op2dep_idx[seq[j]]);
4196  }
4197  if (do_aggregate) {
4198  dep_idx[i].resize(1);
4199  dep_idx[i][0] = i;
4200  }
4201  }
4202 }
4203 
4204 size_t autopar::input_size() const { return glob.inv_index.size(); }
4205 
4206 size_t autopar::output_size() const {
4207  return (do_aggregate ? num_threads : glob.dep_index.size());
4208 }
4209 
4210 Index ParalOp::input_size() const { return n; }
4211 
4212 Index ParalOp::output_size() const { return m; }
4213 
4214 ParalOp::ParalOp(const autopar &ap)
4215  : vglob(ap.vglob),
4216  inv_idx(ap.inv_idx),
4217  dep_idx(ap.dep_idx),
4218  n(ap.input_size()),
4219  m(ap.output_size()) {}
4220 
4221 void ParalOp::forward(ForwardArgs<Scalar> &args) {
4222  size_t num_threads = vglob.size();
4223 
4224 #ifdef _OPENMP
4225 #pragma omp parallel for
4226 #endif
4227 
4228  for (size_t i = 0; i < num_threads; i++) {
4229  for (size_t j = 0; j < inv_idx[i].size(); j++) {
4230  vglob[i].value_inv(j) = args.x(inv_idx[i][j]);
4231  }
4232  vglob[i].forward();
4233  }
4234 
4235  for (size_t i = 0; i < num_threads; i++) {
4236  for (size_t j = 0; j < dep_idx[i].size(); j++) {
4237  args.y(dep_idx[i][j]) = vglob[i].value_dep(j);
4238  }
4239  }
4240 }
4241 
4242 void ParalOp::reverse(ReverseArgs<Scalar> &args) {
4243  size_t num_threads = vglob.size();
4244 
4245 #ifdef _OPENMP
4246 #pragma omp parallel for
4247 #endif
4248 
4249  for (size_t i = 0; i < num_threads; i++) {
4250  vglob[i].clear_deriv();
4251  for (size_t j = 0; j < dep_idx[i].size(); j++) {
4252  vglob[i].deriv_dep(j) = args.dy(dep_idx[i][j]);
4253  }
4254  vglob[i].reverse();
4255  }
4256 
4257  for (size_t i = 0; i < num_threads; i++) {
4258  for (size_t j = 0; j < inv_idx[i].size(); j++) {
4259  args.dx(inv_idx[i][j]) += vglob[i].deriv_inv(j);
4260  }
4261  }
4262 }
4263 
4264 const char *ParalOp::op_name() { return "ParalOp"; }
4265 
4266 void ParalOp::print(global::print_config cfg) {
4267  size_t num_threads = vglob.size();
4268  for (size_t i = 0; i < num_threads; i++) {
4269  global::print_config cfg2 = cfg;
4270  std::stringstream ss;
4271  ss << i;
4272  std::string str = ss.str();
4273  cfg2.prefix = cfg2.prefix + str;
4274  vglob[i].print(cfg2);
4275  }
4276 }
4277 
4278 std::vector<Index> get_likely_expression_duplicates(
4279  const global &glob, std::vector<Index> inv_remap) {
4280  global::hash_config cfg;
4281  cfg.strong_inv = true;
4282  cfg.strong_const = true;
4283  cfg.strong_output = true;
4284  cfg.reduce = false;
4285  cfg.deterministic = false;
4286  cfg.inv_seed = inv_remap;
4287  std::vector<hash_t> h = glob.hash_sweep(cfg);
4288  return radix::first_occurance<Index>(h);
4289 }
4290 
4291 bool all_allow_remap(const global &glob) {
4292  Args<> args(glob.inputs);
4293  for (size_t i = 0; i < glob.opstack.size(); i++) {
4294  op_info info = glob.opstack[i]->info();
4295  if (!info.test(op_info::allow_remap)) {
4296  return false;
4297  }
4298  glob.opstack[i]->increment(args.ptr);
4299  }
4300  return true;
4301 }
4302 
4304  global &glob, std::vector<Index> inv_remap) {
4305  std::vector<Index> remap = get_likely_expression_duplicates(glob, inv_remap);
4306 
4307  for (size_t i = 0; i < glob.inv_index.size(); i++) {
4308  bool accept = false;
4309  Index var_i = glob.inv_index[i];
4310  if (inv_remap.size() > 0) {
4311  Index j = inv_remap[i];
4312  Index var_j = glob.inv_index[j];
4313  accept = remap[var_i] == remap[var_j];
4314  }
4315  if (!accept) remap[var_i] = var_i;
4316  }
4317 
4318  std::vector<Index> v2o = glob.var2op();
4319  std::vector<Index> dep;
4320  global::OperatorPure *invop = glob.getOperator<global::InvOp>();
4321  Dependencies dep1;
4322  Dependencies dep2;
4323  size_t reject = 0;
4324  size_t total = 0;
4325  Args<> args(glob.inputs);
4326 
4327  for (size_t j = 0, i = 0, nout = 0; j < glob.opstack.size(); j++, i += nout) {
4328  nout = glob.opstack[j]->output_size();
4329  bool any_remap = false;
4330  for (size_t k = i; k < i + nout; k++) {
4331  if (remap[k] != k) {
4332  any_remap = true;
4333  break;
4334  }
4335  }
4336  if (any_remap) {
4337  bool ok = true;
4338  total += nout;
4339 
4340  global::OperatorPure *CurOp = glob.opstack[v2o[i]];
4341  global::OperatorPure *RemOp = glob.opstack[v2o[remap[i]]];
4342  ok &= (CurOp->identifier() == RemOp->identifier());
4343 
4344  ok &= (CurOp->input_size() == RemOp->input_size());
4345  ok &= (CurOp->output_size() == RemOp->output_size());
4346 
4347  op_info CurInfo = CurOp->info();
4348 
4349  if (ok && (nout > 1)) {
4350  for (size_t k = 1; k < nout; k++) {
4351  ok &= (remap[i + k] < i);
4352 
4353  ok &= (v2o[remap[i + k]] == v2o[remap[i]]);
4354 
4355  ok &= (remap[i + k] == remap[i] + k);
4356  }
4357  }
4358 
4359  if (CurOp == invop) {
4360  ok = false;
4361  }
4362  if (ok) {
4363  if (CurInfo.test(op_info::is_constant)) {
4364  if (glob.values[i] != glob.values[remap[i]]) {
4365  ok = false;
4366  }
4367  }
4368  }
4369 
4370  if (ok) {
4371  glob.subgraph_cache_ptr();
4372 
4373  args.ptr = glob.subgraph_ptr[v2o[i]];
4374  dep1.resize(0);
4375  glob.opstack[v2o[i]]->dependencies(args, dep1);
4376 
4377  args.ptr = glob.subgraph_ptr[v2o[remap[i]]];
4378  dep2.resize(0);
4379  glob.opstack[v2o[remap[i]]]->dependencies(args, dep2);
4380 
4381  ok = (dep1.size() == dep2.size());
4382  if (ok) {
4383  bool all_equal = true;
4384  for (size_t j = 0; j < dep1.size(); j++) {
4385  all_equal &= (remap[dep1[j]] == remap[dep2[j]]);
4386  }
4387  ok = all_equal;
4388  }
4389  }
4390 
4391  if (!ok) {
4392  reject += nout;
4393  for (size_t k = i; k < i + nout; k++) remap[k] = k;
4394  }
4395  }
4396  }
4397 
4398  for (size_t i = 0; i < remap.size(); i++) {
4399  TMBAD_ASSERT(remap[i] <= i);
4400  TMBAD_ASSERT(remap[remap[i]] == remap[i]);
4401  }
4402 
4403  if (true) {
4404  Args<> args(glob.inputs);
4405  intervals<Index> visited;
4406  for (size_t i = 0; i < glob.opstack.size(); i++) {
4407  op_info info = glob.opstack[i]->info();
4408  if (!info.test(op_info::allow_remap)) {
4409  Dependencies dep;
4410  glob.opstack[i]->dependencies(args, dep);
4411  for (size_t j = 0; j < dep.I.size(); j++) {
4412  visited.insert(dep.I[j].first, dep.I[j].second);
4413  }
4414  }
4415  glob.opstack[i]->increment(args.ptr);
4416  }
4417 
4418  forbid_remap<std::vector<Index> > fb(remap);
4419  visited.apply(fb);
4420  }
4421  if (reject > 0) {
4422  ((void)(total));
4423  }
4424 
4425  return remap;
4426 }
4427 
4429  std::vector<Index> inv_remap(0);
4430  std::vector<Index> remap = remap_identical_sub_expressions(glob, inv_remap);
4431 
4432  for (size_t i = 0; i < glob.inputs.size(); i++) {
4433  glob.inputs[i] = remap[glob.inputs[i]];
4434  }
4435 }
4436 
4437 std::vector<Position> inv_positions(global &glob) {
4438  IndexPair ptr(0, 0);
4439  std::vector<bool> independent_variable = glob.inv_marks();
4440  std::vector<Position> ans(glob.inv_index.size());
4441  size_t k = 0;
4442  for (size_t i = 0; i < glob.opstack.size(); i++) {
4443  Index nout = glob.opstack[i]->output_size();
4444  for (Index j = 0; j < nout; j++) {
4445  if (independent_variable[ptr.second + j]) {
4446  ans[k].node = i;
4447  ans[k].ptr = ptr;
4448  k++;
4449  }
4450  }
4451  glob.opstack[i]->increment(ptr);
4452  }
4453  return ans;
4454 }
4455 
4456 void reorder_graph(global &glob, std::vector<Index> inv_idx) {
4457  if (!all_allow_remap(glob)) return;
4458  for (size_t i = 1; i < inv_idx.size(); i++) {
4459  TMBAD_ASSERT(inv_idx[i] > inv_idx[i - 1]);
4460  }
4461  std::vector<bool> marks(glob.values.size(), false);
4462  for (size_t i = 0; i < inv_idx.size(); i++)
4463  marks[glob.inv_index[inv_idx[i]]] = true;
4464  glob.forward_dense(marks);
4465  if (false) {
4466  int c = std::count(marks.begin(), marks.end(), true);
4467  Rcout << "marked proportion:" << (double)c / (double)marks.size() << "\n";
4468  }
4469 
4470  marks.flip();
4471  glob.set_subgraph(marks);
4472  marks.flip();
4473  glob.set_subgraph(marks, true);
4474  glob = glob.extract_sub();
4475 }
4476 } // namespace TMBad
4477 // Autogenerated - do not edit by hand !
4478 #include "integrate.hpp"
4479 namespace TMBad {
4480 
4481 double value(double x) { return x; }
4482 
4483 control::control(int subdivisions_, double reltol_, double abstol_)
4484  : subdivisions(subdivisions_), reltol(reltol_), abstol(abstol_) {}
4485 } // namespace TMBad
4486 // Autogenerated - do not edit by hand !
4487 #include "radix.hpp"
4488 namespace TMBad {}
4489 // Autogenerated - do not edit by hand !
4490 #include "tmbad_allow_comparison.hpp"
4491 namespace TMBad {
4492 
4493 bool operator<(const ad_aug &x, const ad_aug &y) {
4494  return x.Value() < y.Value();
4495 }
4496 bool operator<(const Scalar &x, const ad_aug &y) { return x < y.Value(); }
4497 
4498 bool operator<=(const ad_aug &x, const ad_aug &y) {
4499  return x.Value() <= y.Value();
4500 }
4501 bool operator<=(const Scalar &x, const ad_aug &y) { return x <= y.Value(); }
4502 
4503 bool operator>(const ad_aug &x, const ad_aug &y) {
4504  return x.Value() > y.Value();
4505 }
4506 bool operator>(const Scalar &x, const ad_aug &y) { return x > y.Value(); }
4507 
4508 bool operator>=(const ad_aug &x, const ad_aug &y) {
4509  return x.Value() >= y.Value();
4510 }
4511 bool operator>=(const Scalar &x, const ad_aug &y) { return x >= y.Value(); }
4512 
4513 bool operator==(const ad_aug &x, const ad_aug &y) {
4514  return x.Value() == y.Value();
4515 }
4516 bool operator==(const Scalar &x, const ad_aug &y) { return x == y.Value(); }
4517 
4518 bool operator!=(const ad_aug &x, const ad_aug &y) {
4519  return x.Value() != y.Value();
4520 }
4521 bool operator!=(const Scalar &x, const ad_aug &y) { return x != y.Value(); }
4522 } // namespace TMBad
4523 // Autogenerated - do not edit by hand !
4524 #include "vectorize.hpp"
4525 namespace TMBad {
4526 
4527 VSumOp::VSumOp(size_t n) : n(n) {}
4528 
4529 void VSumOp::dependencies(Args<> &args, Dependencies &dep) const {
4530  dep.add_segment(args.input(0), n);
4531 }
4532 
4533 void VSumOp::forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
4534 
4535 void VSumOp::reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
4536 
4537 const char *VSumOp::op_name() { return "VSumOp"; }
4538 
4539 ad_aug sum(ad_segment x) {
4540  global::Complete<VSumOp> F(x.size());
4541  return F(x)[0];
4542 }
4543 
4544 Scalar *SegmentRef::value_ptr() { return (*glob_ptr).values.data() + offset; }
4545 
4546 Scalar *SegmentRef::deriv_ptr() { return (*glob_ptr).derivs.data() + offset; }
4547 
4548 SegmentRef::SegmentRef() {}
4549 
4550 SegmentRef::SegmentRef(const Scalar *x) {
4551  SegmentRef *sx = (SegmentRef *)x;
4552  *this = *sx;
4553 }
4554 
4555 SegmentRef::SegmentRef(global *g, Index o, Index s)
4556  : glob_ptr(g), offset(o), size(s) {}
4557 
4558 SegmentRef::SegmentRef(const ad_segment &x) {
4559  static const size_t K = ScalarPack<SegmentRef>::size;
4560  TMBAD_ASSERT(x.size() == K);
4561  Scalar buf[K];
4562  for (size_t i = 0; i < K; i++) buf[i] = x[i].Value();
4563  SegmentRef *sx = (SegmentRef *)buf;
4564  *this = *sx;
4565 }
4566 
4567 bool SegmentRef::isNull() { return (glob_ptr == NULL); }
4568 
4569 void SegmentRef::resize(ad_segment &pack, Index n) {
4570  Index i = pack.index();
4571  SegmentRef *p = (SegmentRef *)(get_glob()->values.data() + i);
4572  p->size = n;
4573 }
4574 
4575 PackOp::PackOp(const Index n) : n(n) {}
4576 
4578  SegmentRef *y = (SegmentRef *)args.y_ptr(0);
4579  y[0] = SegmentRef(args.glob_ptr, args.input(0), n);
4580 }
4581 
4583  ad_segment x(args.x_ptr(0), n);
4584  args.y_segment(0, K) = pack(x);
4585 }
4586 
4588  SegmentRef tmp(args.dy_ptr(0));
4589  if (tmp.glob_ptr != NULL) {
4590  Scalar *dx = SegmentRef(args.y_ptr(0)).deriv_ptr();
4591  Scalar *dy = SegmentRef(args.dy_ptr(0)).deriv_ptr();
4592  for (Index i = 0; i < n; i++) dx[i] += dy[i];
4593  }
4594 }
4595 
4597  ad_segment dy_packed(args.dy_ptr(0), K);
4598 
4599  if (SegmentRef(dy_packed).isNull()) {
4600  SegmentRef().resize(dy_packed, n);
4601  }
4602  ad_segment dy = unpack(dy_packed);
4603  ad_segment dx(args.dx_ptr(0), n, true);
4604  dx += dy;
4605  Replay *pdx = args.dx_ptr(0);
4606  for (Index i = 0; i < n; i++) pdx[i] = dx[i];
4607 }
4608 
4609 const char *PackOp::op_name() { return "PackOp"; }
4610 
4611 void PackOp::dependencies(Args<> &args, Dependencies &dep) const {
4612  dep.add_segment(args.input(0), n);
4613 }
4614 
4615 UnpkOp::UnpkOp(const Index n) : noutput(n) {}
4616 
4618  Scalar *y = args.y_ptr(0);
4619  SegmentRef srx(args.x_ptr(0));
4620  if (srx.isNull()) {
4621  for (Index i = 0; i < noutput; i++) y[i] = 0;
4622  return;
4623  }
4624  Scalar *x = srx.value_ptr();
4625  for (Index i = 0; i < noutput; i++) y[i] = x[i];
4626 
4627  ((SegmentRef *)args.x_ptr(0))->glob_ptr = NULL;
4628 }
4629 
4631  SegmentRef *dx = (SegmentRef *)args.dx_ptr(0);
4632  dx[0] = SegmentRef(args.glob_ptr, args.output(0), noutput);
4633 }
4634 
4636  ad_segment dy(args.dy_ptr(0), noutput);
4637  ad_segment dy_packed = pack(dy);
4638  Replay *pdx = args.dx_ptr(0);
4639  for (Index i = 0; i < dy_packed.size(); i++) pdx[i] = dy_packed[i];
4640 }
4641 
4642 const char *UnpkOp::op_name() { return "UnpkOp"; }
4643 
4644 void UnpkOp::dependencies(Args<> &args, Dependencies &dep) const {
4645  dep.add_segment(args.input(0), K);
4646 }
4647 
4649  global::Complete<PackOp> F(x.size());
4650  return F(x);
4651 }
4652 
4654  Index n = SegmentRef(x).size;
4656  return op(x);
4657 }
4658 
4659 Scalar *unpack(const std::vector<Scalar> &x, Index j) {
4660  Index K = ScalarPack<SegmentRef>::size;
4661  SegmentRef sr(&(x[j * K]));
4662  return sr.value_ptr();
4663 }
4664 
4665 std::vector<ad_aug> concat(const std::vector<ad_segment> &x) {
4666  std::vector<ad_aug> ans;
4667  for (size_t i = 0; i < x.size(); i++) {
4668  ad_segment xi = x[i];
4669  for (size_t j = 0; j < xi.size(); j++) {
4670  ans.push_back(xi[j]);
4671  }
4672  }
4673  return ans;
4674 }
4675 } // namespace TMBad
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 // Autogenerated - do not edit by hand !
2 #include "TMBad.hpp"
3 namespace TMBad {
4 
5 SpJacFun_config::SpJacFun_config() : compress(false), index_remap(true) {}
6 } // namespace TMBad
7 // Autogenerated - do not edit by hand !
8 #include "ad_blas.hpp"
9 namespace TMBad {
10 
11 vmatrix matmul(const vmatrix &x, const vmatrix &y) {
12  vmatrix z(x.rows(), y.cols());
13  Map<vmatrix> zm(&z(0), z.rows(), z.cols());
14  matmul<false, false, false, false>(x, y, zm);
15  return z;
16 }
17 
18 dmatrix matmul(const dmatrix &x, const dmatrix &y) { return x * y; }
19 } // namespace TMBad
20 // Autogenerated - do not edit by hand !
21 #include "checkpoint.hpp"
22 namespace TMBad {
23 
24 bool ParametersChanged::operator()(const std::vector<Scalar> &x) {
25  bool change = (x != x_prev);
26  if (change) {
27  x_prev = x;
28  }
29  return change;
30 }
31 } // namespace TMBad
32 // Autogenerated - do not edit by hand !
33 #include "code_generator.hpp"
34 namespace TMBad {
35 
36 void searchReplace(std::string &str, const std::string &oldStr,
37  const std::string &newStr) {
38  std::string::size_type pos = 0u;
39  while ((pos = str.find(oldStr, pos)) != std::string::npos) {
40  str.replace(pos, oldStr.length(), newStr);
41  pos += newStr.length();
42  }
43 }
44 
45 std::string code_config::float_ptr() { return float_str + (gpu ? "**" : "*"); }
46 
47 std::string code_config::void_str() {
48  return (gpu ? "__device__ void" : "extern \"C\" void");
49 }
50 
51 void code_config::init_code() {
52  if (gpu) {
53  *cout << indent << "int idx = threadIdx.x;" << std::endl;
54  }
55 }
56 
57 void code_config::write_header_comment() {
58  if (header_comment.length() > 0) *cout << header_comment << std::endl;
59 }
60 
61 code_config::code_config()
62  : asm_comments(true),
63  gpu(true),
64  indent(" "),
65  header_comment("// Autogenerated - do not edit by hand !"),
66  float_str(xstringify(TMBAD_SCALAR_TYPE)),
67  cout(&Rcout) {}
68 
69 void write_common(std::ostringstream &buffer, code_config cfg, size_t node) {
70  std::ostream &cout = *cfg.cout;
71  using std::endl;
72  using std::left;
73  using std::setw;
74  std::string indent = cfg.indent;
75  if (cfg.asm_comments)
76  cout << indent << "asm(\"// Node: " << node << "\");" << endl;
77  bool empty_buffer = (buffer.tellp() == 0);
78  if (!empty_buffer) {
79  std::string str = buffer.str();
80  if (cfg.gpu) {
81  std::string pattern = "]";
82  std::string replace = "][idx]";
83  searchReplace(str, pattern, replace);
84  }
85  searchReplace(str, ";v", "; v");
86  searchReplace(str, ";d", "; d");
87  cout << indent << str << endl;
88  }
89 }
90 
91 void write_forward(global &glob, code_config cfg) {
92  using std::endl;
93  using std::left;
94  using std::setw;
95  std::ostream &cout = *cfg.cout;
96  cfg.write_header_comment();
97  cout << cfg.void_str() << " forward(" << cfg.float_ptr() << " v) {" << endl;
98  cfg.init_code();
99  ForwardArgs<Writer> args(glob.inputs, glob.values);
100  for (size_t i = 0; i < glob.opstack.size(); i++) {
101  std::ostringstream buffer;
102  Writer::cout = &buffer;
103  glob.opstack[i]->forward(args);
104  write_common(buffer, cfg, i);
105  glob.opstack[i]->increment(args.ptr);
106  }
107  cout << "}" << endl;
108 }
109 
110 void write_reverse(global &glob, code_config cfg) {
111  using std::endl;
112  using std::left;
113  using std::setw;
114  std::ostream &cout = *cfg.cout;
115  cfg.write_header_comment();
116  cout << cfg.void_str() << " reverse(" << cfg.float_ptr() << " v, "
117  << cfg.float_ptr() << " d) {" << endl;
118  cfg.init_code();
119  ReverseArgs<Writer> args(glob.inputs, glob.values);
120  for (size_t i = glob.opstack.size(); i > 0;) {
121  i--;
122  glob.opstack[i]->decrement(args.ptr);
123  std::ostringstream buffer;
124  Writer::cout = &buffer;
125  glob.opstack[i]->reverse(args);
126  write_common(buffer, cfg, i);
127  }
128  cout << "}" << endl;
129 }
130 
131 void write_all(global glob, code_config cfg) {
132  using std::endl;
133  using std::left;
134  using std::setw;
135  std::ostream &cout = *cfg.cout;
136  cout << "#include \"global.hpp\"" << endl;
137  cout << "#include \"ad_blas.hpp\"" << endl;
138  write_forward(glob, cfg);
139  write_reverse(glob, cfg);
140  cout << "int main() {}" << endl;
141 }
142 } // namespace TMBad
143 #ifndef _WIN32
144 // Autogenerated - do not edit by hand !
145 #include "compile.hpp"
146 namespace TMBad {
147 
148 void compile(global &glob, code_config cfg) {
149  cfg.gpu = false;
150  cfg.asm_comments = false;
151  std::ofstream file;
152  file.open("tmp.cpp");
153  cfg.cout = &file;
154 
155  *cfg.cout << "#include <cmath>" << std::endl;
156  *cfg.cout
157  << "template<class T>T sign(const T &x) { return (x > 0) - (x < 0); }"
158  << std::endl;
159 
160  write_forward(glob, cfg);
161 
162  write_reverse(glob, cfg);
163 
164  int out = system("g++ -O3 -g tmp.cpp -o tmp.so -shared -fPIC");
165  if (out != 0) {
166  }
167 
168  void *handle = dlopen("./tmp.so", RTLD_NOW);
169  if (handle != NULL) {
170  Rcout << "Loading compiled code!" << std::endl;
171  glob.forward_compiled =
172  reinterpret_cast<void (*)(Scalar *)>(dlsym(handle, "forward"));
173  glob.reverse_compiled = reinterpret_cast<void (*)(Scalar *, Scalar *)>(
174  dlsym(handle, "reverse"));
175  }
176 }
177 } // namespace TMBad
178 #endif
179 // Autogenerated - do not edit by hand !
180 #include "compression.hpp"
181 namespace TMBad {
182 
183 std::ostream &operator<<(std::ostream &os, const period &x) {
184  os << "begin: " << x.begin;
185  os << " size: " << x.size;
186  os << " rep: " << x.rep;
187  return os;
188 }
189 
190 std::vector<period> split_period(global *glob, period p,
191  size_t max_period_size) {
192  typedef std::ptrdiff_t ptrdiff_t;
193  glob->subgraph_cache_ptr();
194 
195  size_t offset = glob->subgraph_ptr[p.begin].first;
196 
197  size_t nrow = 0;
198  for (size_t i = 0; i < p.size; i++) {
199  nrow += glob->opstack[p.begin + i]->input_size();
200  }
201 
202  size_t ncol = p.rep;
203 
204  matrix_view<Index> x(&(glob->inputs[offset]), nrow, ncol);
205 
206  std::vector<bool> marks(ncol - 1, false);
207 
208  for (size_t i = 0; i < nrow; i++) {
209  std::vector<period> pd =
210  periodic<ptrdiff_t>(x.row_diff<ptrdiff_t>(i), max_period_size)
211  .find_all();
212 
213  for (size_t j = 0; j < pd.size(); j++) {
214  if (pd[j].begin > 0) {
215  marks[pd[j].begin - 1] = true;
216  }
217  size_t end = pd[j].begin + pd[j].size * pd[j].rep;
218  if (end < marks.size()) marks[end] = true;
219  }
220  }
221 
222  std::vector<period> ans;
223  p.rep = 1;
224  ans.push_back(p);
225  for (size_t j = 0; j < marks.size(); j++) {
226  if (marks[j]) {
227  period pnew = p;
228  pnew.begin = p.begin + (j + 1) * p.size;
229  pnew.rep = 1;
230  ans.push_back(pnew);
231  } else {
232  ans.back().rep++;
233  }
234  }
235 
236  return ans;
237 }
238 
239 size_t compressed_input::input_size() const { return n; }
240 
241 void compressed_input::update_increment_pattern() const {
242  for (size_t i = 0; i < (size_t)np; i++)
243  increment_pattern[which_periodic[i]] =
244  period_data[period_offsets[i] + counter % period_sizes[i]];
245 }
246 
247 void compressed_input::increment(Args<> &args) const {
248  if (np) {
249  update_increment_pattern();
250  counter++;
251  }
252  for (size_t i = 0; i < n; i++) inputs[i] += increment_pattern[i];
253  args.ptr.first = 0;
254 }
255 
256 void compressed_input::decrement(Args<> &args) const {
257  args.ptr.first = input_size();
258  for (size_t i = 0; i < n; i++) inputs[i] -= increment_pattern[i];
259  if (np) {
260  counter--;
261  update_increment_pattern();
262  }
263 }
264 
265 void compressed_input::forward_init(Args<> &args) const {
266  counter = 0;
267  inputs.resize(input_size());
268  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
269  args.inputs = inputs.data();
270  args.ptr.first = 0;
271 }
272 
273 void compressed_input::reverse_init(Args<> &args) {
274  inputs.resize(input_size());
275  for (size_t i = 0; i < inputs.size(); i++)
276  inputs[i] = args.input(i) + input_diff[i];
277 
278  args.inputs = inputs.data();
279  args.ptr.first = 0;
280  args.ptr.second += m * nrep;
281  counter = nrep - 1;
282  update_increment_pattern();
283  args.ptr.first = input_size();
284 }
285 
286 void compressed_input::dependencies_intervals(Args<> &args,
287  std::vector<Index> &lower,
288  std::vector<Index> &upper) const {
289  forward_init(args);
290  lower = inputs;
291  upper = inputs;
292  for (size_t i = 0; i < nrep; i++) {
293  for (size_t j = 0; j < inputs.size(); j++) {
294  if (inputs[j] < lower[j]) lower[j] = inputs[j];
295  if (inputs[j] > upper[j]) upper[j] = inputs[j];
296  }
297  increment(args);
298  }
299 }
300 
301 bool compressed_input::test_period(std::vector<ptrdiff_t> &x, size_t p) {
302  for (size_t j = 0; j < x.size(); j++) {
303  if (x[j] != x[j % p]) return false;
304  }
305  return true;
306 }
307 
308 size_t compressed_input::find_shortest(std::vector<ptrdiff_t> &x) {
309  for (size_t p = 1; p < max_period_size; p++) {
310  if (test_period(x, p)) return p;
311  }
312  return x.size();
313 }
314 
315 compressed_input::compressed_input() {}
316 
317 compressed_input::compressed_input(std::vector<Index> &x, size_t offset,
318  size_t nrow, size_t m, size_t ncol,
319  size_t max_period_size)
320  : n(nrow), m(m), nrep(ncol), counter(0), max_period_size(max_period_size) {
321  matrix_view<Index> xm(&x[offset], nrow, ncol);
322 
323  for (size_t i = 0; i < nrow; i++) {
324  std::vector<ptrdiff_t> rd = xm.row_diff<ptrdiff_t>(i);
325 
326  size_t p = find_shortest(rd);
327 
328  increment_pattern.push_back(rd[0]);
329  if (p != 1) {
330  which_periodic.push_back(i);
331  period_sizes.push_back(p);
332 
333  size_t pos = std::search(period_data.begin(), period_data.end(),
334  rd.begin(), rd.begin() + p) -
335  period_data.begin();
336  if (pos < period_data.size()) {
337  period_offsets.push_back(pos);
338  } else {
339  period_offsets.push_back(period_data.size());
340  period_data.insert(period_data.end(), rd.begin(), rd.begin() + p);
341  }
342  }
343  }
344 
345  np = which_periodic.size();
346 
347  input_diff.resize(n, 0);
348  Args<> args(input_diff);
349  forward_init(args);
350  for (size_t i = 0; i < nrep; i++) {
351  increment(args);
352  }
353  input_diff = inputs;
354 }
355 
356 StackOp::StackOp(global *glob, period p, IndexPair ptr,
357  size_t max_period_size) {
358  opstack.resize(p.size);
359  size_t n = 0, m = 0;
360  for (size_t i = 0; i < p.size; i++) {
361  opstack[i] = glob->opstack[p.begin + i]->copy();
362  n += opstack[i]->input_size();
363  m += opstack[i]->output_size();
364  }
365  ci = compressed_input(glob->inputs, ptr.first, n, m, p.rep, max_period_size);
366 }
367 
368 StackOp::StackOp(const StackOp &x) : opstack(x.opstack), ci(x.ci) {}
369 
370 void StackOp::print(global::print_config cfg) {
371  std::vector<const char *> tmp(opstack.size());
372  for (size_t i = 0; i < opstack.size(); i++) tmp[i] = opstack[i]->op_name();
373  Rcout << cfg.prefix << " opstack = " << tmp << "\n";
374 
375  Rcout << cfg.prefix << " "
376  << "nrep"
377  << " = " << ci.nrep << "\n";
378  ;
379  Rcout << cfg.prefix << " "
380  << "increment_pattern"
381  << " = " << ci.increment_pattern << "\n";
382  ;
383  if (ci.which_periodic.size() > 0) {
384  Rcout << cfg.prefix << " "
385  << "which_periodic"
386  << " = " << ci.which_periodic << "\n";
387  ;
388  Rcout << cfg.prefix << " "
389  << "period_sizes"
390  << " = " << ci.period_sizes << "\n";
391  ;
392  Rcout << cfg.prefix << " "
393  << "period_offsets"
394  << " = " << ci.period_offsets << "\n";
395  ;
396  Rcout << cfg.prefix << " "
397  << "period_data"
398  << " = " << ci.period_data << "\n";
399  ;
400  }
401 
402  Rcout << "\n";
403 }
404 
405 Index StackOp::input_size() const { return ci.n; }
406 
407 Index StackOp::output_size() const { return ci.m * ci.nrep; }
408 
409 void StackOp::forward(ForwardArgs<Writer> &args) {
410  size_t n = ci.n, m = ci.m, nrep = ci.nrep;
411  std::vector<Index> inputs(n);
412  for (size_t i = 0; i < (size_t)n; i++) inputs[i] = args.input(i);
413  std::vector<Index> outputs(m);
414  for (size_t i = 0; i < (size_t)m; i++) outputs[i] = args.output(i);
415  Writer w;
416  size_t np = ci.which_periodic.size();
417  size_t sp = ci.period_data.size();
418  w << "for (int count = 0, ";
419  if (n > 0) {
420  w << "i[" << n << "]=" << inputs << ", "
421  << "ip[" << n << "]=" << ci.increment_pattern << ", ";
422  }
423  if (np > 0) {
424  w << "wp[" << np << "]=" << ci.which_periodic << ", "
425  << "ps[" << np << "]=" << ci.period_sizes << ", "
426  << "po[" << np << "]=" << ci.period_offsets << ", "
427  << "pd[" << sp << "]=" << ci.period_data << ", ";
428  }
429  w << "o[" << m << "]=" << outputs << "; "
430  << "count < " << nrep << "; count++) {\n";
431 
432  w << " ";
433  ForwardArgs<Writer> args_cpy = args;
434  args_cpy.set_indirect();
435  for (size_t k = 0; k < opstack.size(); k++) {
436  opstack[k]->forward_incr(args_cpy);
437  }
438  w << "\n";
439 
440  if (np > 0) {
441  w << " ";
442  for (size_t k = 0; k < np; k++)
443  w << "ip[wp[" << k << "]] = pd[po[" << k << "] + count % ps[" << k
444  << "]]; ";
445  w << "\n";
446  }
447  if (n > 0) {
448  w << " ";
449  for (size_t k = 0; k < n; k++) w << "i[" << k << "] += ip[" << k << "]; ";
450  w << "\n";
451  }
452  w << " ";
453  for (size_t k = 0; k < m; k++) w << "o[" << k << "] += " << m << "; ";
454  w << "\n";
455 
456  w << " ";
457  w << "}";
458 }
459 
460 void StackOp::reverse(ReverseArgs<Writer> &args) {
461  size_t n = ci.n, m = ci.m, nrep = ci.nrep;
462  std::vector<ptrdiff_t> inputs(input_size());
463  for (size_t i = 0; i < inputs.size(); i++) {
464  ptrdiff_t tmp;
465  if (-ci.input_diff[i] < ci.input_diff[i]) {
466  tmp = -((ptrdiff_t)-ci.input_diff[i]);
467  } else {
468  tmp = ci.input_diff[i];
469  }
470  inputs[i] = args.input(i) + tmp;
471  }
472  std::vector<Index> outputs(ci.m);
473  for (size_t i = 0; i < (size_t)ci.m; i++)
474  outputs[i] = args.output(i) + ci.m * ci.nrep;
475  Writer w;
476  size_t np = ci.which_periodic.size();
477  size_t sp = ci.period_data.size();
478  w << "for (int count = " << nrep << ", ";
479  if (n > 0) {
480  w << "i[" << n << "]=" << inputs << ", "
481  << "ip[" << n << "]=" << ci.increment_pattern << ", ";
482  }
483  if (np > 0) {
484  w << "wp[" << np << "]=" << ci.which_periodic << ", "
485  << "ps[" << np << "]=" << ci.period_sizes << ", "
486  << "po[" << np << "]=" << ci.period_offsets << ", "
487  << "pd[" << sp << "]=" << ci.period_data << ", ";
488  }
489  w << "o[" << m << "]=" << outputs << "; "
490  << "count > 0 ; ) {\n";
491 
492  w << " ";
493  w << "count--;\n";
494  if (np > 0) {
495  w << " ";
496  for (size_t k = 0; k < np; k++)
497  w << "ip[wp[" << k << "]] = pd[po[" << k << "] + count % ps[" << k
498  << "]]; ";
499  w << "\n";
500  }
501  if (n > 0) {
502  w << " ";
503  for (size_t k = 0; k < n; k++) w << "i[" << k << "] -= ip[" << k << "]; ";
504  w << "\n";
505  }
506  w << " ";
507  for (size_t k = 0; k < m; k++) w << "o[" << k << "] -= " << m << "; ";
508  w << "\n";
509 
510  w << " ";
511 
512  ReverseArgs<Writer> args_cpy = args;
513  args_cpy.set_indirect();
514  args_cpy.ptr.first = ci.n;
515  args_cpy.ptr.second = ci.m;
516  for (size_t k = opstack.size(); k > 0;) {
517  k--;
518  opstack[k]->reverse_decr(args_cpy);
519  }
520  w << "\n";
521 
522  w << " ";
523  w << "}";
524 }
525 
526 void StackOp::dependencies(Args<> args, Dependencies &dep) const {
527  std::vector<Index> lower;
528  std::vector<Index> upper;
529  ci.dependencies_intervals(args, lower, upper);
530  for (size_t i = 0; i < lower.size(); i++) {
531  dep.add_interval(lower[i], upper[i]);
532  }
533 }
534 
535 const char *StackOp::op_name() { return "StackOp"; }
536 
539  cfg.strong_inv = false;
540  cfg.strong_const = false;
541  cfg.strong_output = false;
542  cfg.reduce = false;
543  cfg.deterministic = false;
544  std::vector<hash_t> h = glob.hash_sweep(cfg);
545  std::vector<Index> remap = radix::first_occurance<Index>(h);
546 
547  TMBAD_ASSERT(all_allow_remap(glob));
548 
549  Args<> args(glob.inputs);
550  for (size_t i = 0; i < glob.opstack.size(); i++) {
551  Dependencies dep;
552  glob.opstack[i]->dependencies(args, dep);
553 
554  Index var = args.ptr.second;
555  toposort_remap<Index> fb(remap, var);
556  dep.apply(fb);
557  glob.opstack[i]->increment(args.ptr);
558  }
559 
560  std::vector<Index> ord = radix::order<Index>(remap);
561  std::vector<Index> v2o = glob.var2op();
562  glob.subgraph_seq = subset(v2o, ord);
563 
564  glob = glob.extract_sub();
565 }
566 
568  std::vector<Index> remap(glob.values.size(), Index(-1));
569  Args<> args(glob.inputs);
570  for (size_t i = 0; i < glob.opstack.size(); i++) {
571  Dependencies dep;
572  glob.opstack[i]->dependencies(args, dep);
573  sort_unique_inplace(dep);
574  Index var = args.ptr.second;
575  temporaries_remap<Index> fb(remap, var);
576  dep.apply(fb);
577  glob.opstack[i]->increment(args.ptr);
578  }
579 
580  for (size_t i = remap.size(); i > 0;) {
581  i--;
582  if (remap[i] == Index(-1))
583  remap[i] = i;
584  else
585  remap[i] = remap[remap[i]];
586  }
587 
588  std::vector<Index> ord = radix::order<Index>(remap);
589  std::vector<Index> v2o = glob.var2op();
590  glob.subgraph_seq = subset(v2o, ord);
591 
592  glob = glob.extract_sub();
593 }
594 
596  std::vector<bool> visited(glob.opstack.size(), false);
597  std::vector<Index> v2o = glob.var2op();
598  std::vector<Index> stack;
599  std::vector<Index> result;
600  Args<> args(glob.inputs);
601  glob.subgraph_cache_ptr();
602  for (size_t k = 0; k < glob.dep_index.size(); k++) {
603  Index dep_var = glob.dep_index[k];
604  Index i = v2o[dep_var];
605 
606  stack.push_back(i);
607  visited[i] = true;
608  while (stack.size() > 0) {
609  Index i = stack.back();
610  args.ptr = glob.subgraph_ptr[i];
611  Dependencies dep;
612  glob.opstack[i]->dependencies(args, dep);
613  dfs_add_to_stack<Index> add_to_stack(stack, visited, v2o);
614  size_t before = stack.size();
615  dep.apply(add_to_stack);
616  size_t after = stack.size();
617  if (before == after) {
618  result.push_back(i);
619  stack.pop_back();
620  }
621  }
622  }
623 
624  glob.subgraph_seq = result;
625  glob = glob.extract_sub();
626 
627  glob.shrink_to_fit();
628 }
629 
630 void compress(global &glob, size_t max_period_size) {
631  size_t min_period_rep = TMBAD_MIN_PERIOD_REP;
632  periodic<global::OperatorPure *> p(glob.opstack, max_period_size,
633  min_period_rep);
634  std::vector<period> periods = p.find_all();
635 
636  std::vector<period> periods_expand;
637  for (size_t i = 0; i < periods.size(); i++) {
638  std::vector<period> tmp = split_period(&glob, periods[i], max_period_size);
639 
640  if (tmp.size() > 10) {
641  tmp.resize(0);
642  tmp.push_back(periods[i]);
643  }
644 
645  for (size_t j = 0; j < tmp.size(); j++) {
646  if (tmp[j].rep > 1) periods_expand.push_back(tmp[j]);
647  }
648  }
649 
650  std::swap(periods, periods_expand);
651  OperatorPure *null_op = get_glob()->getOperator<global::NullOp>();
652  IndexPair ptr(0, 0);
653  Index k = 0;
654  for (size_t i = 0; i < periods.size(); i++) {
655  period p = periods[i];
656  TMBAD_ASSERT(p.rep >= 1);
657  while (k < p.begin) {
658  glob.opstack[k]->increment(ptr);
659  k++;
660  }
661 
662  OperatorPure *pOp =
663  get_glob()->getOperator<StackOp>(&glob, p, ptr, max_period_size);
664  Index ninp = 0;
665  for (size_t j = 0; j < p.size * p.rep; j++) {
666  ninp += glob.opstack[p.begin + j]->input_size();
667  glob.opstack[p.begin + j]->deallocate();
668  glob.opstack[p.begin + j] = null_op;
669  }
670  glob.opstack[p.begin] = pOp;
671  ninp -= pOp->input_size();
672  glob.opstack[p.begin + 1] =
673  get_glob()->getOperator<global::NullOp2>(ninp, 0);
674  }
675 
676  std::vector<bool> marks(glob.values.size(), true);
677  glob.extract_sub_inplace(marks);
678  glob.shrink_to_fit();
679 }
680 } // namespace TMBad
681 // Autogenerated - do not edit by hand !
682 #include "global.hpp"
683 namespace TMBad {
684 
685 global *global_ptr_data[TMBAD_MAX_NUM_THREADS] = {NULL};
686 global **global_ptr = global_ptr_data;
687 std::ostream *Writer::cout = 0;
688 bool global::fuse = 0;
689 
690 global *get_glob() { return global_ptr[TMBAD_THREAD_NUM]; }
691 
692 Dependencies::Dependencies() {}
693 
694 void Dependencies::clear() {
695  this->resize(0);
696  I.resize(0);
697 }
698 
699 void Dependencies::add_interval(Index a, Index b) {
700  I.push_back(std::pair<Index, Index>(a, b));
701 }
702 
703 void Dependencies::add_segment(Index start, Index size) {
704  if (size > 0) add_interval(start, start + size - 1);
705 }
706 
707 void Dependencies::monotone_transform_inplace(const std::vector<Index> &x) {
708  for (size_t i = 0; i < this->size(); i++) (*this)[i] = x[(*this)[i]];
709  for (size_t i = 0; i < I.size(); i++) {
710  I[i].first = x[I[i].first];
711  I[i].second = x[I[i].second];
712  }
713 }
714 
715 bool Dependencies::any(const std::vector<bool> &x) const {
716  for (size_t i = 0; i < this->size(); i++)
717  if (x[(*this)[i]]) return true;
718  for (size_t i = 0; i < I.size(); i++) {
719  for (Index j = I[i].first; j <= I[i].second; j++) {
720  if (x[j]) return true;
721  }
722  }
723  return false;
724 }
725 
726 std::string tostr(const Index &x) {
727  std::ostringstream strs;
728  strs << x;
729  return strs.str();
730 }
731 
732 std::string tostr(const Scalar &x) {
733  std::ostringstream strs;
734  strs << x;
735  return strs.str();
736 }
737 
738 Writer::Writer(std::string str) : std::string(str) {}
739 
740 Writer::Writer(Scalar x) : std::string(tostr(x)) {}
741 
742 Writer::Writer() {}
743 
744 std::string Writer::p(std::string x) { return "(" + x + ")"; }
745 
746 Writer Writer::operator+(const Writer &other) {
747  return p(*this + " + " + other);
748 }
749 
750 Writer Writer::operator-(const Writer &other) {
751  return p(*this + " - " + other);
752 }
753 
754 Writer Writer::operator-() { return " - " + *this; }
755 
756 Writer Writer::operator*(const Writer &other) { return *this + " * " + other; }
757 
758 Writer Writer::operator/(const Writer &other) { return *this + " / " + other; }
759 
760 Writer Writer::operator*(const Scalar &other) {
761  return *this + "*" + tostr(other);
762 }
763 
764 Writer Writer::operator+(const Scalar &other) {
765  return p(*this + "+" + tostr(other));
766 }
767 
768 void Writer::operator=(const Writer &other) {
769  *cout << *this + " = " + other << ";";
770 }
771 
772 void Writer::operator+=(const Writer &other) {
773  *cout << *this + " += " + other << ";";
774 }
775 
776 void Writer::operator-=(const Writer &other) {
777  *cout << *this + " -= " + other << ";";
778 }
779 
780 void Writer::operator*=(const Writer &other) {
781  *cout << *this + " *= " + other << ";";
782 }
783 
784 void Writer::operator/=(const Writer &other) {
785  *cout << *this + " /= " + other << ";";
786 }
787 
788 Position::Position(Index node, Index first, Index second)
789  : node(node), ptr(first, second) {}
790 
791 Position::Position() : node(0), ptr(0, 0) {}
792 
793 bool Position::operator<(const Position &other) const {
794  return this->node < other.node;
795 }
796 
797 graph::graph() {}
798 
799 size_t graph::num_neighbors(Index node) { return p[node + 1] - p[node]; }
800 
801 Index *graph::neighbors(Index node) { return &(j[p[node]]); }
802 
803 bool graph::empty() { return p.size() == 0; }
804 
805 size_t graph::num_nodes() { return (empty() ? 0 : p.size() - 1); }
806 
807 void graph::print() {
808  for (size_t node = 0; node < num_nodes(); node++) {
809  Rcout << node << ": ";
810  for (size_t i = 0; i < num_neighbors(node); i++) {
811  Rcout << " " << neighbors(node)[i];
812  }
813  Rcout << "\n";
814  }
815 }
816 
817 std::vector<Index> graph::rowcounts() {
818  std::vector<Index> ans(num_nodes());
819  for (size_t i = 0; i < ans.size(); i++) ans[i] = num_neighbors(i);
820  return ans;
821 }
822 
823 std::vector<Index> graph::colcounts() {
824  std::vector<Index> ans(num_nodes());
825  for (size_t i = 0; i < j.size(); i++) ans[j[i]]++;
826  return ans;
827 }
828 
829 void graph::bfs(const std::vector<Index> &start, std::vector<bool> &visited,
830  std::vector<Index> &result) {
831  for (size_t i = 0; i < start.size(); i++) {
832  Index node = start[i];
833  for (size_t j_ = 0; j_ < num_neighbors(node); j_++) {
834  Index k = neighbors(node)[j_];
835  if (!visited[k]) {
836  result.push_back(k);
837  visited[k] = true;
838  }
839  }
840  }
841 }
842 
843 void graph::search(std::vector<Index> &start, bool sort_input,
844  bool sort_output) {
845  if (mark.size() == 0) mark.resize(num_nodes(), false);
846 
847  search(start, mark, sort_input, sort_output);
848 
849  for (size_t i = 0; i < start.size(); i++) mark[start[i]] = false;
850 }
851 
852 void graph::search(std::vector<Index> &start, std::vector<bool> &visited,
853  bool sort_input, bool sort_output) {
854  if (sort_input) sort_unique_inplace(start);
855 
856  for (size_t i = 0; i < start.size(); i++) visited[start[i]] = true;
857 
858  bfs(start, visited, start);
859 
860  if (sort_output) sort_inplace(start);
861 }
862 
863 std::vector<Index> graph::boundary(const std::vector<Index> &subgraph) {
864  if (mark.size() == 0) mark.resize(num_nodes(), false);
865 
866  std::vector<Index> boundary;
867 
868  for (size_t i = 0; i < subgraph.size(); i++) mark[subgraph[i]] = true;
869 
870  bfs(subgraph, mark, boundary);
871 
872  for (size_t i = 0; i < subgraph.size(); i++) mark[subgraph[i]] = false;
873  for (size_t i = 0; i < boundary.size(); i++) mark[boundary[i]] = false;
874 
875  return boundary;
876 }
877 
878 graph::graph(size_t num_nodes, const std::vector<IndexPair> &edges) {
879  std::vector<IndexPair>::const_iterator it;
880  std::vector<Index> row_counts(num_nodes, 0);
881  for (it = edges.begin(); it != edges.end(); it++) {
882  row_counts[it->first]++;
883  }
884 
885  p.resize(num_nodes + 1);
886  p[0] = 0;
887  for (size_t i = 0; i < num_nodes; i++) {
888  p[i + 1] = p[i] + row_counts[i];
889  }
890 
891  std::vector<Index> k(p);
892  j.resize(edges.size());
893  for (it = edges.begin(); it != edges.end(); it++) {
894  j[k[it->first]++] = it->second;
895  }
896 }
897 
898 op_info::op_info() : code(0) {
899  static_assert(sizeof(IntRep) * 8 >= op_flag_count,
900  "'IntRep' not wide enough!");
901 }
902 
903 op_info::op_info(op_flag f) : code(1 << f) {}
904 
905 bool op_info::test(op_flag f) const { return code & 1 << f; }
906 
907 op_info &op_info::operator|=(const op_info &other) {
908  code |= other.code;
909  return *this;
910 }
911 
912 op_info &op_info::operator&=(const op_info &other) {
913  code &= other.code;
914  return *this;
915 }
916 
917 global::operation_stack::operation_stack() {}
918 
919 global::operation_stack::operation_stack(const operation_stack &other) {
920  (*this).copy_from(other);
921 }
922 
923 void global::operation_stack::push_back(OperatorPure *x) {
924  Base::push_back(x);
925 
926  any |= x->info();
927 }
928 
929 operation_stack &global::operation_stack::operator=(
930  const operation_stack &other) {
931  if (this != &other) {
932  (*this).clear();
933  (*this).copy_from(other);
934  }
935  return *this;
936 }
937 
938 global::operation_stack::~operation_stack() { (*this).clear(); }
939 
940 void global::operation_stack::clear() {
941  if (any.test(op_info::dynamic)) {
942  for (size_t i = 0; i < (*this).size(); i++) (*this)[i]->deallocate();
943  }
944  (*this).resize(0);
945 }
946 
947 void global::operation_stack::copy_from(const operation_stack &other) {
948  if (other.any.test(op_info::dynamic)) {
949  for (size_t i = 0; i < other.size(); i++) Base::push_back(other[i]->copy());
950  } else {
951  Base::operator=(other);
952  }
953  this->any = other.any;
954 }
955 
956 global::global()
957  : forward_compiled(NULL),
958  reverse_compiled(NULL),
959  parent_glob(NULL),
960  in_use(false) {}
961 
962 void global::clear() {
963  values.resize(0);
964  derivs.resize(0);
965  inputs.resize(0);
966  inv_index.resize(0);
967  dep_index.resize(0);
968  subgraph_ptr.resize(0);
969  subgraph_seq.resize(0);
970  opstack.clear();
971 }
972 
973 void global::shrink_to_fit(double tol) {
974  std::vector<Scalar>().swap(derivs);
975  std::vector<IndexPair>().swap(subgraph_ptr);
976  if (values.size() < tol * values.capacity())
977  std::vector<Scalar>(values).swap(values);
978  if (inputs.size() < tol * inputs.capacity())
979  std::vector<Index>(inputs).swap(inputs);
980  if (opstack.size() < tol * opstack.capacity())
981  std::vector<OperatorPure *>(opstack).swap(opstack);
982 }
983 
984 void global::clear_deriv(Position start) {
985  derivs.resize(values.size());
986  std::fill(derivs.begin() + start.ptr.second, derivs.end(), 0);
987 }
988 
989 Scalar &global::value_inv(Index i) { return values[inv_index[i]]; }
990 
991 Scalar &global::deriv_inv(Index i) { return derivs[inv_index[i]]; }
992 
993 Scalar &global::value_dep(Index i) { return values[dep_index[i]]; }
994 
995 Scalar &global::deriv_dep(Index i) { return derivs[dep_index[i]]; }
996 
997 Position global::begin() { return Position(0, 0, 0); }
998 
999 Position global::end() {
1000  return Position(opstack.size(), inputs.size(), values.size());
1001 }
1002 
1003 CONSTEXPR bool global::no_filter::operator[](size_t i) const { return true; }
1004 
1005 void global::forward(Position start) {
1006  if (forward_compiled != NULL) {
1007  forward_compiled(values.data());
1008  return;
1009  }
1010  ForwardArgs<Scalar> args(inputs, values, this);
1011  args.ptr = start.ptr;
1012  forward_loop(args, start.node);
1013 }
1014 
1015 void global::reverse(Position start) {
1016  if (reverse_compiled != NULL) {
1017  reverse_compiled(values.data(), derivs.data());
1018  return;
1019  }
1020  ReverseArgs<Scalar> args(inputs, values, derivs, this);
1021  reverse_loop(args, start.node);
1022 }
1023 
1024 void global::forward_sub() {
1025  ForwardArgs<Scalar> args(inputs, values, this);
1026  forward_loop_subgraph(args);
1027 }
1028 
1029 void global::reverse_sub() {
1030  ReverseArgs<Scalar> args(inputs, values, derivs, this);
1031  reverse_loop_subgraph(args);
1032 }
1033 
1034 void global::forward(std::vector<bool> &marks) {
1035  intervals<Index> marked_intervals;
1036  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1037  forward_loop(args);
1038 }
1039 
1040 void global::reverse(std::vector<bool> &marks) {
1041  intervals<Index> marked_intervals;
1042  ReverseArgs<bool> args(inputs, marks, marked_intervals);
1043  reverse_loop(args);
1044 }
1045 
1046 void global::forward_sub(std::vector<bool> &marks,
1047  const std::vector<bool> &node_filter) {
1048  intervals<Index> marked_intervals;
1049  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1050  if (node_filter.size() == 0)
1051  forward_loop_subgraph(args);
1052  else
1053  forward_loop(args, 0, node_filter);
1054 }
1055 
1056 void global::reverse_sub(std::vector<bool> &marks,
1057  const std::vector<bool> &node_filter) {
1058  intervals<Index> marked_intervals;
1059  ReverseArgs<bool> args(inputs, marks, marked_intervals);
1060  if (node_filter.size() == 0)
1061  reverse_loop_subgraph(args);
1062  else
1063  reverse_loop(args, 0, node_filter);
1064 }
1065 
1066 void global::forward_dense(std::vector<bool> &marks) {
1067  intervals<Index> marked_intervals;
1068  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1069  for (size_t i = 0; i < opstack.size(); i++) {
1070  opstack[i]->forward_incr_mark_dense(args);
1071  }
1072 }
1073 
1074 intervals<Index> global::updating_intervals() const {
1075  Dependencies dep;
1076  intervals<Index> marked_intervals;
1077  Args<> args(inputs);
1078  for (size_t i = 0; i < opstack.size(); i++) {
1079  if (opstack[i]->info().test(op_info::updating)) {
1080  dep.clear();
1081  opstack[i]->dependencies(args, dep);
1082 
1083  for (size_t i = 0; i < dep.I.size(); i++) {
1084  Index a = dep.I[i].first;
1085  Index b = dep.I[i].second;
1086  marked_intervals.insert(a, b);
1087  }
1088  }
1089  opstack[i]->increment(args.ptr);
1090  }
1091  return marked_intervals;
1092 }
1093 
1094 intervals<Index> global::updating_intervals_sub() const {
1095  Dependencies dep;
1096  intervals<Index> marked_intervals;
1097  Args<> args(inputs);
1098  subgraph_cache_ptr();
1099  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1100  Index i = subgraph_seq[j];
1101  args.ptr = subgraph_ptr[i];
1102  if (opstack[i]->info().test(op_info::updating)) {
1103  dep.clear();
1104  opstack[i]->dependencies(args, dep);
1105 
1106  for (size_t i = 0; i < dep.I.size(); i++) {
1107  Index a = dep.I[i].first;
1108  Index b = dep.I[i].second;
1109  marked_intervals.insert(a, b);
1110  }
1111  }
1112  }
1113  return marked_intervals;
1114 }
1115 
1116 Replay &global::replay::value_inv(Index i) { return values[orig.inv_index[i]]; }
1117 
1118 Replay &global::replay::deriv_inv(Index i) { return derivs[orig.inv_index[i]]; }
1119 
1120 Replay &global::replay::value_dep(Index i) { return values[orig.dep_index[i]]; }
1121 
1122 Replay &global::replay::deriv_dep(Index i) { return derivs[orig.dep_index[i]]; }
1123 
1124 global::replay::replay(const global &orig, global &target)
1125  : orig(orig), target(target) {
1126  TMBAD_ASSERT(&orig != &target);
1127 }
1128 
1129 void global::replay::start() {
1130  parent_glob = get_glob();
1131  if (&target != parent_glob) target.ad_start();
1132  values = std::vector<Replay>(orig.values.begin(), orig.values.end());
1133 }
1134 
1135 void global::replay::stop() {
1136  if (&target != parent_glob) target.ad_stop();
1137  TMBAD_ASSERT(parent_glob == get_glob());
1138 }
1139 
1140 void global::replay::add_updatable_derivs(const intervals<Index> &I) {
1141  struct {
1142  Replay *p;
1143  void operator()(Index a, Index b) {
1144  Index n = b - a + 1;
1145  global::ZeroOp Z(n);
1146  Z(p + a, n);
1147  }
1148  } F = {derivs.data()};
1149  I.apply(F);
1150 }
1151 
1152 void global::replay::clear_deriv() {
1153  derivs.resize(values.size());
1154  std::fill(derivs.begin(), derivs.end(), Replay(0));
1155 
1156  if (orig.opstack.any.test(op_info::updating)) {
1157  intervals<Index> I = orig.updating_intervals();
1158  add_updatable_derivs(I);
1159  }
1160 }
1161 
1162 void global::replay::forward(bool inv_tags, bool dep_tags, Position start,
1163  const std::vector<bool> &node_filter) {
1164  TMBAD_ASSERT(&target == get_glob());
1165  if (inv_tags) {
1166  for (size_t i = 0; i < orig.inv_index.size(); i++)
1167  value_inv(i).Independent();
1168  }
1169  ForwardArgs<Replay> args(orig.inputs, values);
1170  if (node_filter.size() > 0) {
1171  TMBAD_ASSERT(node_filter.size() == orig.opstack.size());
1172  orig.forward_loop(args, start.node, node_filter);
1173  } else {
1174  orig.forward_loop(args, start.node);
1175  }
1176  if (dep_tags) {
1177  for (size_t i = 0; i < orig.dep_index.size(); i++) value_dep(i).Dependent();
1178  }
1179 }
1180 
1181 void global::replay::reverse(bool dep_tags, bool inv_tags, Position start,
1182  const std::vector<bool> &node_filter) {
1183  TMBAD_ASSERT(&target == get_glob());
1184  if (inv_tags) {
1185  for (size_t i = 0; i < orig.dep_index.size(); i++)
1186  deriv_dep(i).Independent();
1187  }
1188  ReverseArgs<Replay> args(orig.inputs, values, derivs);
1189  if (node_filter.size() > 0) {
1190  TMBAD_ASSERT(node_filter.size() == orig.opstack.size());
1191  orig.reverse_loop(args, start.node, node_filter);
1192  } else {
1193  orig.reverse_loop(args, start.node);
1194  }
1195 
1196  std::fill(derivs.begin(), derivs.begin() + start.ptr.second, Replay(0));
1197  if (dep_tags) {
1198  for (size_t i = 0; i < orig.inv_index.size(); i++) deriv_inv(i).Dependent();
1199  }
1200 }
1201 
1202 void global::replay::forward_sub() {
1203  ForwardArgs<Replay> args(orig.inputs, values);
1204  orig.forward_loop_subgraph(args);
1205 }
1206 
1207 void global::replay::reverse_sub() {
1208  ReverseArgs<Replay> args(orig.inputs, values, derivs);
1209  orig.reverse_loop_subgraph(args);
1210 }
1211 
1212 void global::replay::clear_deriv_sub() {
1213  orig.clear_array_subgraph(derivs);
1214 
1215  if (orig.opstack.any.test(op_info::updating)) {
1216  intervals<Index> I = orig.updating_intervals_sub();
1217  add_updatable_derivs(I);
1218  }
1219 }
1220 
1221 void global::forward_replay(bool inv_tags, bool dep_tags) {
1222  global new_glob;
1223  global::replay replay(*this, new_glob);
1224  replay.start();
1225  replay.forward(inv_tags, dep_tags);
1226  replay.stop();
1227  *this = new_glob;
1228 }
1229 
1230 void global::subgraph_cache_ptr() const {
1231  if (subgraph_ptr.size() == opstack.size()) return;
1232  TMBAD_ASSERT(subgraph_ptr.size() < opstack.size());
1233  if (subgraph_ptr.size() == 0) subgraph_ptr.push_back(IndexPair(0, 0));
1234  for (size_t i = subgraph_ptr.size(); i < opstack.size(); i++) {
1235  IndexPair ptr = subgraph_ptr[i - 1];
1236  opstack[i - 1]->increment(ptr);
1237  subgraph_ptr.push_back(ptr);
1238  }
1239 }
1240 
1241 void global::set_subgraph(const std::vector<bool> &marks, bool append) {
1242  std::vector<Index> v2o = var2op();
1243  if (!append) subgraph_seq.resize(0);
1244  Index previous = (Index)-1;
1245  for (size_t i = 0; i < marks.size(); i++) {
1246  if (marks[i] && (v2o[i] != previous)) {
1247  subgraph_seq.push_back(v2o[i]);
1248  previous = v2o[i];
1249  }
1250  }
1251 }
1252 
1253 void global::mark_subgraph(std::vector<bool> &marks) {
1254  TMBAD_ASSERT(marks.size() == values.size());
1255  clear_array_subgraph(marks, true);
1256 }
1257 
1258 void global::unmark_subgraph(std::vector<bool> &marks) {
1259  TMBAD_ASSERT(marks.size() == values.size());
1260  clear_array_subgraph(marks, false);
1261 }
1262 
1263 void global::subgraph_trivial() {
1264  subgraph_cache_ptr();
1265  subgraph_seq.resize(0);
1266  for (size_t i = 0; i < opstack.size(); i++) subgraph_seq.push_back(i);
1267 }
1268 
1269 void global::clear_deriv_sub() { clear_array_subgraph(derivs); }
1270 
1271 global global::extract_sub(std::vector<Index> &var_remap, global new_glob) {
1272  subgraph_cache_ptr();
1273  TMBAD_ASSERT(var_remap.size() == 0 || var_remap.size() == values.size());
1274  var_remap.resize(values.size(), 0);
1275  std::vector<bool> independent_variable = inv_marks();
1276  std::vector<bool> dependent_variable = dep_marks();
1277  ForwardArgs<Scalar> args(inputs, values, this);
1278  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1279  Index i = subgraph_seq[j];
1280  args.ptr = subgraph_ptr[i];
1281 
1282  size_t nout = opstack[i]->output_size();
1283  for (size_t k = 0; k < nout; k++) {
1284  Index new_index = new_glob.values.size();
1285  Index old_index = args.output(k);
1286  var_remap[old_index] = new_index;
1287  new_glob.values.push_back(args.y(k));
1288  if (independent_variable[old_index]) {
1289  independent_variable[old_index] = false;
1290  }
1291  if (dependent_variable[old_index]) {
1292  dependent_variable[old_index] = false;
1293  }
1294  }
1295 
1296  size_t nin = opstack[i]->input_size();
1297  for (size_t k = 0; k < nin; k++) {
1298  new_glob.inputs.push_back(var_remap[args.input(k)]);
1299  }
1300 
1301  new_glob.opstack.push_back(opstack[i]->copy());
1302  }
1303 
1304  independent_variable.flip();
1305  dependent_variable.flip();
1306 
1307  for (size_t i = 0; i < inv_index.size(); i++) {
1308  Index old_var = inv_index[i];
1309  if (independent_variable[old_var])
1310  new_glob.inv_index.push_back(var_remap[old_var]);
1311  }
1312  for (size_t i = 0; i < dep_index.size(); i++) {
1313  Index old_var = dep_index[i];
1314  if (dependent_variable[old_var])
1315  new_glob.dep_index.push_back(var_remap[old_var]);
1316  }
1317  return new_glob;
1318 }
1319 
1320 void global::extract_sub_inplace(std::vector<bool> marks) {
1321  TMBAD_ASSERT(marks.size() == values.size());
1322  std::vector<Index> var_remap(values.size(), 0);
1323  std::vector<bool> independent_variable = inv_marks();
1324  std::vector<bool> dependent_variable = dep_marks();
1325  intervals<Index> marked_intervals;
1326  ForwardArgs<bool> args(inputs, marks, marked_intervals);
1327  size_t s = 0, s_input = 0;
1328  std::vector<bool> opstack_deallocate(opstack.size(), false);
1329 
1330  for (size_t i = 0; i < opstack.size(); i++) {
1331  op_info info = opstack[i]->info();
1332 
1333  size_t nout = opstack[i]->output_size();
1334  bool any_marked_output = info.test(op_info::elimination_protected);
1335  for (size_t j = 0; j < nout; j++) {
1336  any_marked_output |= args.y(j);
1337  }
1338  if (info.test(op_info::updating) && nout == 0) {
1339  Dependencies dep;
1340  opstack[i]->dependencies_updating(args, dep);
1341  any_marked_output |= dep.any(args.values);
1342  }
1343 
1344  if (any_marked_output) {
1345  for (size_t k = 0; k < nout; k++) {
1346  Index new_index = s;
1347  Index old_index = args.output(k);
1348  var_remap[old_index] = new_index;
1349  values[new_index] = values[old_index];
1350  if (independent_variable[old_index]) {
1351  independent_variable[old_index] = false;
1352  }
1353  if (dependent_variable[old_index]) {
1354  dependent_variable[old_index] = false;
1355  }
1356  s++;
1357  }
1358 
1359  size_t nin = opstack[i]->input_size();
1360  for (size_t k = 0; k < nin; k++) {
1361  inputs[s_input] = var_remap[args.input(k)];
1362  s_input++;
1363  }
1364  }
1365  opstack[i]->increment(args.ptr);
1366  if (!any_marked_output) {
1367  opstack_deallocate[i] = true;
1368  }
1369  }
1370 
1371  independent_variable.flip();
1372  dependent_variable.flip();
1373  std::vector<Index> new_inv_index;
1374  for (size_t i = 0; i < inv_index.size(); i++) {
1375  Index old_var = inv_index[i];
1376  if (independent_variable[old_var])
1377  new_inv_index.push_back(var_remap[old_var]);
1378  }
1379  inv_index = new_inv_index;
1380  std::vector<Index> new_dep_index;
1381  for (size_t i = 0; i < dep_index.size(); i++) {
1382  Index old_var = dep_index[i];
1383  if (dependent_variable[old_var])
1384  new_dep_index.push_back(var_remap[old_var]);
1385  }
1386  dep_index = new_dep_index;
1387 
1388  inputs.resize(s_input);
1389  values.resize(s);
1390  size_t k = 0;
1391  for (size_t i = 0; i < opstack.size(); i++) {
1392  if (opstack_deallocate[i]) {
1393  opstack[i]->deallocate();
1394  } else {
1395  opstack[k] = opstack[i];
1396  k++;
1397  }
1398  }
1399  opstack.resize(k);
1400 
1401  if (opstack.any.test(op_info::dynamic)) this->forward();
1402 }
1403 
1404 global global::extract_sub() {
1405  std::vector<Index> var_remap;
1406  return extract_sub(var_remap);
1407 }
1408 
1409 std::vector<Index> global::var2op() {
1410  std::vector<Index> var2op(values.size());
1411  Args<> args(inputs);
1412  size_t j = 0;
1413  for (size_t i = 0; i < opstack.size(); i++) {
1414  opstack[i]->increment(args.ptr);
1415  for (; j < (size_t)args.ptr.second; j++) {
1416  var2op[j] = i;
1417  }
1418  }
1419  return var2op;
1420 }
1421 
1422 std::vector<bool> global::var2op(const std::vector<bool> &values) {
1423  std::vector<bool> ans(opstack.size(), false);
1424  Args<> args(inputs);
1425  size_t j = 0;
1426  for (size_t i = 0; i < opstack.size(); i++) {
1427  opstack[i]->increment(args.ptr);
1428  for (; j < (size_t)args.ptr.second; j++) {
1429  ans[i] = ans[i] || values[j];
1430  }
1431  }
1432  return ans;
1433 }
1434 
1435 std::vector<Index> global::op2var(const std::vector<Index> &seq) {
1436  std::vector<bool> seq_mark = mark_space(opstack.size(), seq);
1437  std::vector<Index> ans;
1438  Args<> args(inputs);
1439  size_t j = 0;
1440  for (size_t i = 0; i < opstack.size(); i++) {
1441  opstack[i]->increment(args.ptr);
1442  for (; j < (size_t)args.ptr.second; j++) {
1443  if (seq_mark[i]) ans.push_back(j);
1444  }
1445  }
1446  return ans;
1447 }
1448 
1449 std::vector<bool> global::op2var(const std::vector<bool> &seq_mark) {
1450  std::vector<bool> ans(values.size());
1451  Args<> args(inputs);
1452  size_t j = 0;
1453  for (size_t i = 0; i < opstack.size(); i++) {
1454  opstack[i]->increment(args.ptr);
1455  for (; j < (size_t)args.ptr.second; j++) {
1456  if (seq_mark[i]) ans[j] = true;
1457  }
1458  }
1459  return ans;
1460 }
1461 
1462 std::vector<Index> global::op2idx(const std::vector<Index> &var_subset,
1463  Index NA) {
1464  std::vector<Index> v2o = var2op();
1465  std::vector<Index> op2idx(opstack.size(), NA);
1466  for (size_t i = var_subset.size(); i > 0;) {
1467  i--;
1468  op2idx[v2o[var_subset[i]]] = i;
1469  }
1470  return op2idx;
1471 }
1472 
1473 std::vector<bool> global::mark_space(size_t n, const std::vector<Index> ind) {
1474  std::vector<bool> mark(n, false);
1475  for (size_t i = 0; i < ind.size(); i++) {
1476  mark[ind[i]] = true;
1477  }
1478  return mark;
1479 }
1480 
1481 std::vector<bool> global::inv_marks() {
1482  return mark_space(values.size(), inv_index);
1483 }
1484 
1485 std::vector<bool> global::dep_marks() {
1486  return mark_space(values.size(), dep_index);
1487 }
1488 
1489 std::vector<bool> global::subgraph_marks() {
1490  return mark_space(opstack.size(), subgraph_seq);
1491 }
1492 
1493 global::append_edges::append_edges(size_t &i, size_t num_nodes,
1494  const std::vector<bool> &keep_var,
1495  std::vector<Index> &var2op,
1496  std::vector<IndexPair> &edges)
1497  : i(i),
1498  keep_var(keep_var),
1499  var2op(var2op),
1500  edges(edges),
1501  op_marks(num_nodes, false),
1502  pos(0) {}
1503 
1504 void global::append_edges::operator()(Index dep_j) {
1505  if (keep_var[dep_j]) {
1506  size_t k = var2op[dep_j];
1507  if (i != k && !op_marks[k]) {
1508  IndexPair edge;
1509 
1510  edge.first = k;
1511  edge.second = i;
1512  edges.push_back(edge);
1513  op_marks[k] = true;
1514  }
1515  }
1516 }
1517 
1518 void global::append_edges::start_iteration() { pos = edges.size(); }
1519 
1520 void global::append_edges::end_iteration() {
1521  size_t n = edges.size() - pos;
1522  for (size_t j = 0; j < n; j++) op_marks[edges[pos + j].first] = false;
1523 }
1524 
1525 graph global::build_graph(bool transpose, const std::vector<bool> &keep_var) {
1526  TMBAD_ASSERT(keep_var.size() == values.size());
1527 
1528  std::vector<Index> var2op = this->var2op();
1529 
1530  bool any_updating = false;
1531 
1532  Args<> args(inputs);
1533  std::vector<IndexPair> edges;
1534  Dependencies dep;
1535  size_t i = 0;
1536  append_edges F(i, opstack.size(), keep_var, var2op, edges);
1537  for (; i < opstack.size(); i++) {
1538  any_updating |= opstack[i]->info().test(op_info::updating);
1539  dep.clear();
1540  opstack[i]->dependencies(args, dep);
1541  F.start_iteration();
1542  dep.apply(F);
1543  F.end_iteration();
1544  opstack[i]->increment(args.ptr);
1545  }
1546  if (any_updating) {
1547  size_t begin = edges.size();
1548  i = 0;
1549  args = Args<>(inputs);
1550  for (; i < opstack.size(); i++) {
1551  dep.clear();
1552  opstack[i]->dependencies_updating(args, dep);
1553  F.start_iteration();
1554  dep.apply(F);
1555  F.end_iteration();
1556  opstack[i]->increment(args.ptr);
1557  }
1558  for (size_t j = begin; j < edges.size(); j++)
1559  std::swap(edges[j].first, edges[j].second);
1560  }
1561 
1562  if (transpose) {
1563  for (size_t j = 0; j < edges.size(); j++)
1564  std::swap(edges[j].first, edges[j].second);
1565  }
1566 
1567  graph G(opstack.size(), edges);
1568 
1569  for (size_t i = 0; i < inv_index.size(); i++)
1570  G.inv2op.push_back(var2op[inv_index[i]]);
1571  for (size_t i = 0; i < dep_index.size(); i++)
1572  G.dep2op.push_back(var2op[dep_index[i]]);
1573  return G;
1574 }
1575 
1576 graph global::forward_graph(std::vector<bool> keep_var) {
1577  if (keep_var.size() == 0) {
1578  keep_var.resize(values.size(), true);
1579  }
1580  TMBAD_ASSERT(values.size() == keep_var.size());
1581  return build_graph(false, keep_var);
1582 }
1583 
1584 graph global::reverse_graph(std::vector<bool> keep_var) {
1585  if (keep_var.size() == 0) {
1586  keep_var.resize(values.size(), true);
1587  }
1588  TMBAD_ASSERT(values.size() == keep_var.size());
1589  return build_graph(true, keep_var);
1590 }
1591 
1592 bool global::identical(const global &other) const {
1593  if (inv_index != other.inv_index) return false;
1594  ;
1595  if (dep_index != other.dep_index) return false;
1596  ;
1597  if (opstack.size() != other.opstack.size()) return false;
1598  ;
1599  for (size_t i = 0; i < opstack.size(); i++) {
1600  if (opstack[i]->identifier() != other.opstack[i]->identifier())
1601  return false;
1602  ;
1603  }
1604  if (inputs != other.inputs) return false;
1605  ;
1606  if (values.size() != other.values.size()) return false;
1607  ;
1608  OperatorPure *constant = getOperator<ConstOp>();
1609  IndexPair ptr(0, 0);
1610  for (size_t i = 0; i < opstack.size(); i++) {
1611  if (opstack[i] == constant) {
1612  if (values[ptr.second] != other.values[ptr.second]) return false;
1613  ;
1614  }
1615  opstack[i]->increment(ptr);
1616  }
1617 
1618  return true;
1619 }
1620 
1621 hash_t global::hash() const {
1622  hash_t h = 37;
1623 
1624  hash(h, inv_index.size());
1625  ;
1626  for (size_t i = 0; i < inv_index.size(); i++) hash(h, inv_index[i]);
1627  ;
1628  ;
1629  hash(h, dep_index.size());
1630  ;
1631  for (size_t i = 0; i < dep_index.size(); i++) hash(h, dep_index[i]);
1632  ;
1633  ;
1634  hash(h, opstack.size());
1635  ;
1636  for (size_t i = 0; i < opstack.size(); i++) hash(h, opstack[i]);
1637  ;
1638  ;
1639  hash(h, inputs.size());
1640  ;
1641  for (size_t i = 0; i < inputs.size(); i++) hash(h, inputs[i]);
1642  ;
1643  ;
1644  hash(h, values.size());
1645  ;
1646  OperatorPure *constant = getOperator<ConstOp>();
1647  IndexPair ptr(0, 0);
1648  for (size_t i = 0; i < opstack.size(); i++) {
1649  if (opstack[i] == constant) {
1650  hash(h, values[ptr.second]);
1651  ;
1652  }
1653  opstack[i]->increment(ptr);
1654  }
1655 
1656  return h;
1657 }
1658 
1659 std::vector<hash_t> global::hash_sweep(hash_config cfg) const {
1660  std::vector<Index> opstack_id;
1661  if (cfg.deterministic) {
1662  std::vector<size_t> tmp(opstack.size());
1663  for (size_t i = 0; i < tmp.size(); i++)
1664  tmp[i] = (size_t)opstack[i]->identifier();
1665  opstack_id = radix::first_occurance<Index>(tmp);
1666  hash_t spread = (hash_t(1) << (sizeof(hash_t) * 4)) - 1;
1667  for (size_t i = 0; i < opstack_id.size(); i++)
1668  opstack_id[i] = (opstack_id[i] + 1) * spread;
1669  }
1670 
1671  std::vector<hash_t> hash_vec(values.size(), 37);
1672  Dependencies dep;
1673  OperatorPure *inv = getOperator<InvOp>();
1674  OperatorPure *constant = getOperator<ConstOp>();
1675 
1676  if (cfg.strong_inv) {
1677  bool have_inv_seed = (cfg.inv_seed.size() > 0);
1678  if (have_inv_seed) {
1679  TMBAD_ASSERT(cfg.inv_seed.size() == inv_index.size());
1680  }
1681  for (size_t i = 0; i < inv_index.size(); i++) {
1682  hash_vec[inv_index[i]] += (have_inv_seed ? cfg.inv_seed[i] + 1 : (i + 1));
1683  }
1684  }
1685 
1686  Args<> args(inputs);
1687  IndexPair &ptr = args.ptr;
1688  for (size_t i = 0; i < opstack.size(); i++) {
1689  if (opstack[i] == inv) {
1690  opstack[i]->increment(ptr);
1691  continue;
1692  }
1693  dep.clear();
1694 
1695  opstack[i]->dependencies(args, dep);
1696 
1697  hash_t h = 37;
1698  for (size_t j = 0; j < dep.size(); j++) {
1699  if (j == 0)
1700  h = hash_vec[dep[0]];
1701  else
1702  hash(h, hash_vec[dep[j]]);
1703  ;
1704  }
1705 
1706  if (!cfg.deterministic) {
1707  hash(h, opstack[i]->identifier());
1708  ;
1709  } else {
1710  hash(h, opstack_id[i]);
1711  ;
1712  }
1713 
1714  if (opstack[i] == constant && cfg.strong_const) {
1715  hash(h, values[ptr.second]);
1716  ;
1717 
1718  hash(h, values[ptr.second] > 0);
1719  ;
1720  }
1721 
1722  size_t noutput = opstack[i]->output_size();
1723  for (size_t j = 0; j < noutput; j++) {
1724  hash_vec[ptr.second + j] = h + j * cfg.strong_output;
1725  }
1726 
1727  opstack[i]->increment(ptr);
1728  }
1729  if (!cfg.reduce) return hash_vec;
1730  std::vector<hash_t> ans(dep_index.size());
1731  for (size_t j = 0; j < dep_index.size(); j++) {
1732  ans[j] = hash_vec[dep_index[j]];
1733  }
1734  return ans;
1735 }
1736 
1737 std::vector<hash_t> global::hash_sweep(bool weak) const {
1738  hash_config cfg;
1739  cfg.strong_inv = !weak;
1740  cfg.strong_const = true;
1741  cfg.strong_output = true;
1742  cfg.reduce = weak;
1743  cfg.deterministic = false;
1744  return hash_sweep(cfg);
1745 }
1746 
1747 void global::eliminate() {
1748  this->shrink_to_fit();
1749 
1750  std::vector<bool> marks;
1751  marks.resize(values.size(), false);
1752 
1753  for (size_t i = 0; i < inv_index.size(); i++) marks[inv_index[i]] = true;
1754  for (size_t i = 0; i < dep_index.size(); i++) marks[dep_index[i]] = true;
1755 
1756  reverse(marks);
1757 
1758  if (false) {
1759  set_subgraph(marks);
1760 
1761  *this = extract_sub();
1762  }
1763  this->extract_sub_inplace(marks);
1764  this->shrink_to_fit();
1765 }
1766 
1767 global::print_config::print_config() : prefix(""), mark("*"), depth(0) {}
1768 
1769 void global::print(print_config cfg) {
1770  using std::endl;
1771  using std::left;
1772  using std::setw;
1773  IndexPair ptr(0, 0);
1774  std::vector<bool> sgm = subgraph_marks();
1775  bool have_subgraph = (subgraph_seq.size() > 0);
1776  int v = 0;
1777  print_config cfg2 = cfg;
1778  cfg2.depth--;
1779  cfg2.prefix = cfg.prefix + "##";
1780  Rcout << cfg.prefix;
1781  Rcout << setw(7) << "OpName:" << setw(7 + have_subgraph)
1782  << "Node:" << setw(13) << "Value:" << setw(13) << "Deriv:" << setw(13)
1783  << "Index:";
1784  Rcout << " "
1785  << "Inputs:";
1786  Rcout << endl;
1787  for (size_t i = 0; i < opstack.size(); i++) {
1788  Rcout << cfg.prefix;
1789  Rcout << setw(7) << opstack[i]->op_name();
1790  if (have_subgraph) {
1791  if (sgm[i])
1792  Rcout << cfg.mark;
1793  else
1794  Rcout << " ";
1795  }
1796  Rcout << setw(7) << i;
1797  int numvar = opstack[i]->output_size();
1798  for (int j = 0; j < numvar + (numvar == 0); j++) {
1799  if (j > 0) Rcout << cfg.prefix;
1800  Rcout << setw((7 + 7) * (j > 0) + 13);
1801  if (numvar > 0)
1802  Rcout << values[v];
1803  else
1804  Rcout << "";
1805  Rcout << setw(13);
1806  if (numvar > 0) {
1807  if (derivs.size() == values.size())
1808  Rcout << derivs[v];
1809  else
1810  Rcout << "NA";
1811  } else {
1812  Rcout << "";
1813  }
1814  Rcout << setw(13);
1815  if (numvar > 0) {
1816  Rcout << v;
1817  } else {
1818  Rcout << "";
1819  }
1820  if (j == 0) {
1821  IndexPair ptr_old = ptr;
1822  opstack[i]->increment(ptr);
1823  int ninput = ptr.first - ptr_old.first;
1824  for (int k = 0; k < ninput; k++) {
1825  if (k == 0) Rcout << " ";
1826  Rcout << " " << inputs[ptr_old.first + k];
1827  }
1828  }
1829  Rcout << endl;
1830  if (numvar > 0) {
1831  v++;
1832  }
1833  }
1834  if (cfg.depth > 0) opstack[i]->print(cfg2);
1835  }
1836 }
1837 
1838 void global::print() { this->print(print_config()); }
1839 
1840 global::DynamicInputOutputOperator::DynamicInputOutputOperator(Index ninput,
1841  Index noutput)
1842  : ninput_(ninput), noutput_(noutput) {}
1843 
1844 Index global::DynamicInputOutputOperator::input_size() const {
1845  return this->ninput_;
1846 }
1847 
1848 Index global::DynamicInputOutputOperator::output_size() const {
1849  return this->noutput_;
1850 }
1851 
1852 const char *global::InvOp::op_name() { return "InvOp"; }
1853 
1854 const char *global::DepOp::op_name() { return "DepOp"; }
1855 
1856 void global::ConstOp::forward(ForwardArgs<Replay> &args) {
1857  args.y(0).addToTape();
1858 }
1859 
1860 const char *global::ConstOp::op_name() { return "ConstOp"; }
1861 
1862 void global::ConstOp::forward(ForwardArgs<Writer> &args) {
1863  if (args.const_literals) {
1864  args.y(0) = args.y_const(0);
1865  }
1866 }
1867 
1868 global::DataOp::DataOp(Index n) { Base::noutput = n; }
1869 
1870 const char *global::DataOp::op_name() { return "DataOp"; }
1871 
1872 void global::DataOp::forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
1873 
1874 global::ZeroOp::ZeroOp(Index n) { Base::noutput = n; }
1875 
1876 const char *global::ZeroOp::op_name() { return "ZeroOp"; }
1877 
1878 void global::ZeroOp::forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
1879 
1880 void global::ZeroOp::operator()(Replay *x, Index n) {
1881  Complete<ZeroOp> Z(n);
1882  ad_segment y = Z(ad_segment());
1883  for (size_t i = 0; i < n; i++) x[i] = y[i];
1884 }
1885 
1886 global::NullOp::NullOp() {}
1887 
1888 const char *global::NullOp::op_name() { return "NullOp"; }
1889 
1890 global::NullOp2::NullOp2(Index ninput, Index noutput)
1891  : global::DynamicInputOutputOperator(ninput, noutput) {}
1892 
1893 const char *global::NullOp2::op_name() { return "NullOp2"; }
1894 
1895 global::RefOp::RefOp(global *glob, Index i) : glob(glob), i(i) {}
1896 
1897 void global::RefOp::forward(ForwardArgs<Scalar> &args) {
1898  args.y(0) = glob->values[i];
1899 }
1900 
1901 void global::RefOp::forward(ForwardArgs<Replay> &args) {
1902  if (get_glob() == this->glob) {
1903  ad_plain tmp;
1904  tmp.index = i;
1905  args.y(0) = tmp;
1906  } else {
1907  global::OperatorPure *pOp =
1908  get_glob()->getOperator<RefOp>(this->glob, this->i);
1909  args.y(0) =
1910  get_glob()->add_to_stack<RefOp>(pOp, std::vector<ad_plain>(0))[0];
1911  }
1912 }
1913 
1914 void global::RefOp::reverse(ReverseArgs<Replay> &args) {
1915  if (get_glob() == this->glob) {
1916  args.dx(0) += args.dy(0);
1917  }
1918 }
1919 
1920 const char *global::RefOp::op_name() { return "RefOp"; }
1921 
1922 OperatorPure *global::Fuse(OperatorPure *Op1, OperatorPure *Op2) {
1923  if (Op1 == Op2)
1924  return Op1->self_fuse();
1925  else
1926  return Op1->other_fuse(Op2);
1927 }
1928 
1929 void global::set_fuse(bool flag) { fuse = flag; }
1930 
1931 void global::add_to_opstack(OperatorPure *pOp) {
1932  if (fuse) {
1933  while (this->opstack.size() > 0) {
1934  OperatorPure *OpTry = this->Fuse(this->opstack.back(), pOp);
1935  if (OpTry == NULL) break;
1936 
1937  this->opstack.pop_back();
1938  pOp = OpTry;
1939  }
1940  }
1941 
1942  this->opstack.push_back(pOp);
1943 }
1944 
1945 bool global::ad_plain::initialized() const { return index != NA; }
1946 
1947 bool global::ad_plain::on_some_tape() const { return initialized(); }
1948 
1949 void global::ad_plain::addToTape() const { TMBAD_ASSERT(initialized()); }
1950 
1951 global *global::ad_plain::glob() const {
1952  return (on_some_tape() ? get_glob() : NULL);
1953 }
1954 
1955 void global::ad_plain::override_by(const ad_plain &x) const {}
1956 
1957 global::ad_plain::ad_plain() : index(NA) {}
1958 
1959 global::ad_plain::ad_plain(Scalar x) {
1960  *this = get_glob()->add_to_stack<ConstOp>(x);
1961 }
1962 
1963 global::ad_plain::ad_plain(ad_aug x) {
1964  x.addToTape();
1965  *this = x.taped_value;
1966 }
1967 
1968 Replay global::ad_plain::CopyOp::eval(Replay x0) { return x0.copy(); }
1969 
1970 const char *global::ad_plain::CopyOp::op_name() { return "CopyOp"; }
1971 
1972 ad_plain global::ad_plain::copy() const {
1973  ad_plain ans = get_glob()->add_to_stack<CopyOp>(*this);
1974  return ans;
1975 }
1976 
1977 Replay global::ad_plain::ValOp::eval(Replay x0) { return x0.copy0(); }
1978 
1979 void global::ad_plain::ValOp::dependencies(Args<> &args,
1980  Dependencies &dep) const {}
1981 
1982 const char *global::ad_plain::ValOp::op_name() { return "ValOp"; }
1983 
1984 ad_plain global::ad_plain::copy0() const {
1985  ad_plain ans = get_glob()->add_to_stack<ValOp>(*this);
1986  return ans;
1987 }
1988 
1989 ad_plain global::ad_plain::operator+(const ad_plain &other) const {
1990  ad_plain ans;
1991  ans = get_glob()->add_to_stack<AddOp>(*this, other);
1992  return ans;
1993 }
1994 
1995 ad_plain global::ad_plain::operator-(const ad_plain &other) const {
1996  ad_plain ans;
1997  ans = get_glob()->add_to_stack<SubOp>(*this, other);
1998  return ans;
1999 }
2000 
2001 ad_plain global::ad_plain::operator*(const ad_plain &other) const {
2002  ad_plain ans = get_glob()->add_to_stack<MulOp>(*this, other);
2003  return ans;
2004 }
2005 
2006 ad_plain global::ad_plain::operator*(const Scalar &other) const {
2007  ad_plain ans =
2008  get_glob()->add_to_stack<MulOp_<true, false> >(*this, ad_plain(other));
2009  return ans;
2010 }
2011 
2012 ad_plain global::ad_plain::operator/(const ad_plain &other) const {
2013  ad_plain ans = get_glob()->add_to_stack<DivOp>(*this, other);
2014  return ans;
2015 }
2016 
2017 const char *global::ad_plain::NegOp::op_name() { return "NegOp"; }
2018 
2019 ad_plain global::ad_plain::operator-() const {
2020  ad_plain ans = get_glob()->add_to_stack<NegOp>(*this);
2021  return ans;
2022 }
2023 
2024 ad_plain &global::ad_plain::operator+=(const ad_plain &other) {
2025  *this = *this + other;
2026  return *this;
2027 }
2028 
2029 ad_plain &global::ad_plain::operator-=(const ad_plain &other) {
2030  *this = *this - other;
2031  return *this;
2032 }
2033 
2034 ad_plain &global::ad_plain::operator*=(const ad_plain &other) {
2035  *this = *this * other;
2036  return *this;
2037 }
2038 
2039 ad_plain &global::ad_plain::operator/=(const ad_plain &other) {
2040  *this = *this / other;
2041  return *this;
2042 }
2043 
2044 void global::ad_plain::Dependent() {
2045  *this = get_glob()->add_to_stack<DepOp>(*this);
2046  get_glob()->dep_index.push_back(this->index);
2047 }
2048 
2049 void global::ad_plain::Independent() {
2050  Scalar val = (index == NA ? NAN : this->Value());
2051  *this = get_glob()->add_to_stack<InvOp>(val);
2052  get_glob()->inv_index.push_back(this->index);
2053 }
2054 
2055 Scalar &global::ad_plain::Value() { return get_glob()->values[index]; }
2056 
2057 Scalar global::ad_plain::Value() const { return get_glob()->values[index]; }
2058 
2059 Scalar global::ad_plain::Value(global *glob) const {
2060  return glob->values[index];
2061 }
2062 
2063 Scalar &global::ad_plain::Deriv() { return get_glob()->derivs[index]; }
2064 
2065 void global::ad_start() {
2066  TMBAD_ASSERT2(!in_use, "Tape already in use");
2067  TMBAD_ASSERT(parent_glob == NULL);
2068  parent_glob = global_ptr[TMBAD_THREAD_NUM];
2069  global_ptr[TMBAD_THREAD_NUM] = this;
2070  in_use = true;
2071 }
2072 
2073 void global::ad_stop() {
2074  TMBAD_ASSERT2(in_use, "Tape not in use");
2075  global_ptr[TMBAD_THREAD_NUM] = parent_glob;
2076  parent_glob = NULL;
2077  in_use = false;
2078 }
2079 
2080 void global::Independent(std::vector<ad_plain> &x) {
2081  for (size_t i = 0; i < x.size(); i++) {
2082  x[i].Independent();
2083  }
2084 }
2085 
2086 global::ad_segment::ad_segment() : n(0), c(0) {}
2087 
2088 global::ad_segment::ad_segment(ad_plain x, size_t n) : x(x), n(n), c(1) {}
2089 
2090 global::ad_segment::ad_segment(ad_aug x) : x(ad_plain(x)), n(1), c(1) {}
2091 
2092 global::ad_segment::ad_segment(Scalar x) : x(ad_plain(x)), n(1), c(1) {}
2093 
2094 global::ad_segment::ad_segment(Index idx, size_t n) : n(n) { x.index = idx; }
2095 
2096 global::ad_segment::ad_segment(ad_plain x, size_t r, size_t c)
2097  : x(x), n(r * c), c(c) {}
2098 
2099 global::ad_segment::ad_segment(Replay *x, size_t n, bool zero_check)
2100  : n(n), c(1) {
2101  if (zero_check && all_zero(x, n)) return;
2102  if (all_constant(x, n)) {
2103  global *glob = get_glob();
2104  size_t m = glob->values.size();
2105  Complete<DataOp> D(n);
2106  D(ad_segment());
2107  for (size_t i = 0; i < n; i++) glob->values[m + i] = x[i].Value();
2108  this->x.index = m;
2109  return;
2110  }
2111  if (!is_contiguous(x, n)) {
2112  size_t before = get_glob()->values.size();
2113  this->x = x[0].copy();
2114  for (size_t i = 1; i < n; i++) x[i].copy();
2115  size_t after = get_glob()->values.size();
2116  TMBAD_ASSERT2(after - before == n,
2117  "Each invocation of copy() should construct a new variable");
2118  return;
2119  }
2120  if (n > 0) this->x = x[0];
2121 }
2122 
2123 bool global::ad_segment::identicalZero() { return !x.initialized(); }
2124 
2125 bool global::ad_segment::all_on_active_tape(Replay *x, size_t n) {
2126  global *cur_glob = get_glob();
2127  for (size_t i = 0; i < n; i++) {
2128  bool ok = x[i].on_some_tape() && (x[i].glob() == cur_glob);
2129  if (!ok) return false;
2130  }
2131  return true;
2132 }
2133 
2134 bool global::ad_segment::is_contiguous(Replay *x, size_t n) {
2135  if (!all_on_active_tape(x, n)) return false;
2136  for (size_t i = 1; i < n; i++) {
2137  if (x[i].index() != x[i - 1].index() + 1) return false;
2138  }
2139  return true;
2140 }
2141 
2142 bool global::ad_segment::all_zero(Replay *x, size_t n) {
2143  for (size_t i = 0; i < n; i++) {
2144  if (!x[i].identicalZero()) return false;
2145  }
2146  return true;
2147 }
2148 
2149 bool global::ad_segment::all_constant(Replay *x, size_t n) {
2150  for (size_t i = 0; i < n; i++) {
2151  if (!x[i].constant()) return false;
2152  }
2153  return true;
2154 }
2155 
2156 size_t global::ad_segment::size() const { return n; }
2157 
2158 size_t global::ad_segment::rows() const { return n / c; }
2159 
2160 size_t global::ad_segment::cols() const { return c; }
2161 
2162 ad_plain global::ad_segment::operator[](size_t i) const {
2163  ad_plain ans;
2164  ans.index = x.index + i;
2165  return ans;
2166 }
2167 
2168 ad_plain global::ad_segment::offset() const { return x; }
2169 
2170 Index global::ad_segment::index() const { return x.index; }
2171 
2172 bool global::ad_aug::on_some_tape() const { return taped_value.initialized(); }
2173 
2175  return on_some_tape() && (this->glob() == get_glob());
2176 }
2177 
2178 bool global::ad_aug::ontape() const { return on_some_tape(); }
2179 
2180 bool global::ad_aug::constant() const { return !taped_value.initialized(); }
2181 
2182 Index global::ad_aug::index() const { return taped_value.index; }
2183 
2184 global *global::ad_aug::glob() const {
2185  return (on_some_tape() ? data.glob : NULL);
2186 }
2187 
2188 Scalar global::ad_aug::Value() const {
2189  if (on_some_tape())
2190  return taped_value.Value(this->data.glob);
2191  else
2192  return data.value;
2193 }
2194 
2196 
2197 global::ad_aug::ad_aug(Scalar x) { data.value = x; }
2198 
2199 global::ad_aug::ad_aug(ad_plain x) : taped_value(x) { data.glob = get_glob(); }
2200 
2202  if (on_some_tape()) {
2203  if (data.glob != get_glob()) {
2204  TMBAD_ASSERT2(in_context_stack(data.glob), "Variable not initialized?");
2205  global::OperatorPure *pOp =
2206  get_glob()->getOperator<RefOp>(data.glob, taped_value.index);
2207  this->taped_value =
2208  get_glob()->add_to_stack<RefOp>(pOp, std::vector<ad_plain>(0))[0];
2209 
2210  this->data.glob = get_glob();
2211  }
2212  return;
2213  }
2214  this->taped_value = ad_plain(data.value);
2215  this->data.glob = get_glob();
2216 }
2217 
2218 void global::ad_aug::override_by(const ad_plain &x) const {
2219  this->taped_value = x;
2220  this->data.glob = get_glob();
2221 }
2222 
2224  global *cur_glob = get_glob();
2225  while (cur_glob != NULL) {
2226  if (cur_glob == glob) return true;
2227  cur_glob = cur_glob->parent_glob;
2228  }
2229  return false;
2230 }
2231 
2233  if (on_active_tape()) {
2234  return taped_value.copy();
2235  } else {
2236  ad_aug cpy = *this;
2237  cpy.addToTape();
2238  return cpy;
2239  }
2240 }
2241 
2243  ad_aug cpy = *this;
2244  if (!cpy.on_active_tape()) {
2245  cpy.addToTape();
2246  }
2247  return cpy.taped_value.copy0();
2248 }
2249 
2251  return constant() && data.value == Scalar(0);
2252 }
2253 
2255  return constant() && data.value == Scalar(1);
2256 }
2257 
2258 bool global::ad_aug::bothConstant(const ad_aug &other) const {
2259  return constant() && other.constant();
2260 }
2261 
2262 bool global::ad_aug::identical(const ad_aug &other) const {
2263  if (constant() && other.constant()) return (data.value == other.data.value);
2264 
2265  if (glob() == other.glob())
2266  return (taped_value.index == other.taped_value.index);
2267  return false;
2268 }
2269 
2271  if (bothConstant(other)) return Scalar(this->data.value + other.data.value);
2272  if (this->identicalZero()) return other;
2273  if (other.identicalZero()) return *this;
2274  return ad_plain(*this) + ad_plain(other);
2275 }
2276 
2278  if (bothConstant(other)) return Scalar(this->data.value - other.data.value);
2279  if (other.identicalZero()) return *this;
2280  if (this->identicalZero()) return -other;
2281  if (this->identical(other)) return Scalar(0);
2282  return ad_plain(*this) - ad_plain(other);
2283 }
2284 
2286  if (this->constant()) return Scalar(-(this->data.value));
2287  return -ad_plain(*this);
2288 }
2289 
2291  if (bothConstant(other)) return Scalar(this->data.value * other.data.value);
2292  if (this->identicalZero()) return *this;
2293  if (other.identicalZero()) return other;
2294  if (this->identicalOne()) return other;
2295  if (other.identicalOne()) return *this;
2296  if (this->constant()) return ad_plain(other) * Scalar(this->data.value);
2297  if (other.constant()) return ad_plain(*this) * Scalar(other.data.value);
2298  return ad_plain(*this) * ad_plain(other);
2299 }
2300 
2302  if (bothConstant(other)) return Scalar(this->data.value / other.data.value);
2303  if (this->identicalZero()) return *this;
2304  if (other.identicalOne()) return *this;
2305  return ad_plain(*this) / ad_plain(other);
2306 }
2307 
2309  *this = *this + other;
2310  return *this;
2311 }
2312 
2314  *this = *this - other;
2315  return *this;
2316 }
2317 
2319  *this = *this * other;
2320  return *this;
2321 }
2322 
2324  *this = *this / other;
2325  return *this;
2326 }
2327 
2329  this->addToTape();
2330  taped_value.Dependent();
2331 }
2332 
2334  taped_value.Independent();
2335  taped_value.Value() = this->data.value;
2336  this->data.glob = get_glob();
2337 }
2338 
2339 Scalar &global::ad_aug::Value() {
2340  if (on_some_tape())
2341 
2342  return taped_value.Value();
2343  else
2344  return data.value;
2345 }
2346 
2347 Scalar &global::ad_aug::Deriv() { return taped_value.Deriv(); }
2348 
2349 void global::Independent(std::vector<ad_aug> &x) {
2350  for (size_t i = 0; i < x.size(); i++) {
2351  x[i].Independent();
2352  }
2353 }
2354 
2355 std::ostream &operator<<(std::ostream &os, const global::ad_plain &x) {
2356  os << x.Value();
2357  return os;
2358 }
2359 
2360 std::ostream &operator<<(std::ostream &os, const global::ad_aug &x) {
2361  os << "{";
2362  if (x.on_some_tape()) {
2363  os << "value=" << x.data.glob->values[x.taped_value.index] << ", ";
2364  os << "index=" << x.taped_value.index << ", ";
2365  os << "tape=" << x.data.glob;
2366  } else {
2367  os << "const=" << x.data.value;
2368  }
2369  os << "}";
2370  return os;
2371 }
2372 
2373 ad_plain_index::ad_plain_index(const Index &i) { this->index = i; }
2374 
2375 ad_plain_index::ad_plain_index(const ad_plain &x) : ad_plain(x) {}
2376 
2377 ad_aug_index::ad_aug_index(const Index &i) : ad_aug(ad_plain_index(i)) {}
2378 
2379 ad_aug_index::ad_aug_index(const ad_aug &x) : ad_aug(x) {}
2380 
2381 ad_aug_index::ad_aug_index(const ad_plain &x) : ad_aug(x) {}
2382 
2383 Scalar Value(Scalar x) { return x; }
2384 
2385 ad_aug operator+(const double &x, const ad_aug &y) { return ad_aug(x) + y; }
2386 
2387 ad_aug operator-(const double &x, const ad_aug &y) { return ad_aug(x) - y; }
2388 
2389 ad_aug operator*(const double &x, const ad_aug &y) { return ad_aug(x) * y; }
2390 
2391 ad_aug operator/(const double &x, const ad_aug &y) { return ad_aug(x) / y; }
2392 
2393 bool operator<(const double &x, const ad_adapt &y) { return x < y.Value(); }
2394 
2395 bool operator<=(const double &x, const ad_adapt &y) { return x <= y.Value(); }
2396 
2397 bool operator>(const double &x, const ad_adapt &y) { return x > y.Value(); }
2398 
2399 bool operator>=(const double &x, const ad_adapt &y) { return x >= y.Value(); }
2400 
2401 bool operator==(const double &x, const ad_adapt &y) { return x == y.Value(); }
2402 
2403 bool operator!=(const double &x, const ad_adapt &y) { return x != y.Value(); }
2404 
2405 Writer floor(const Writer &x) {
2406  return "floor"
2407  "(" +
2408  x + ")";
2409 }
2410 const char *FloorOp::op_name() { return "FloorOp"; }
2411 ad_plain floor(const ad_plain &x) {
2412  return get_glob()->add_to_stack<FloorOp>(x);
2413 }
2414 ad_aug floor(const ad_aug &x) {
2415  if (x.constant())
2416  return Scalar(floor(x.Value()));
2417  else
2418  return floor(ad_plain(x));
2419 }
2420 
2421 Writer ceil(const Writer &x) {
2422  return "ceil"
2423  "(" +
2424  x + ")";
2425 }
2426 const char *CeilOp::op_name() { return "CeilOp"; }
2427 ad_plain ceil(const ad_plain &x) { return get_glob()->add_to_stack<CeilOp>(x); }
2428 ad_aug ceil(const ad_aug &x) {
2429  if (x.constant())
2430  return Scalar(ceil(x.Value()));
2431  else
2432  return ceil(ad_plain(x));
2433 }
2434 
2435 Writer trunc(const Writer &x) {
2436  return "trunc"
2437  "(" +
2438  x + ")";
2439 }
2440 const char *TruncOp::op_name() { return "TruncOp"; }
2441 ad_plain trunc(const ad_plain &x) {
2442  return get_glob()->add_to_stack<TruncOp>(x);
2443 }
2444 ad_aug trunc(const ad_aug &x) {
2445  if (x.constant())
2446  return Scalar(trunc(x.Value()));
2447  else
2448  return trunc(ad_plain(x));
2449 }
2450 
2451 Writer round(const Writer &x) {
2452  return "round"
2453  "(" +
2454  x + ")";
2455 }
2456 const char *RoundOp::op_name() { return "RoundOp"; }
2457 ad_plain round(const ad_plain &x) {
2458  return get_glob()->add_to_stack<RoundOp>(x);
2459 }
2460 ad_aug round(const ad_aug &x) {
2461  if (x.constant())
2462  return Scalar(round(x.Value()));
2463  else
2464  return round(ad_plain(x));
2465 }
2466 
2467 double sign(const double &x) { return (x >= 0) - (x < 0); }
2468 
2469 Writer sign(const Writer &x) {
2470  return "sign"
2471  "(" +
2472  x + ")";
2473 }
2474 const char *SignOp::op_name() { return "SignOp"; }
2475 ad_plain sign(const ad_plain &x) { return get_glob()->add_to_stack<SignOp>(x); }
2476 ad_aug sign(const ad_aug &x) {
2477  if (x.constant())
2478  return Scalar(sign(x.Value()));
2479  else
2480  return sign(ad_plain(x));
2481 }
2482 
2483 double ge0(const double &x) { return (x >= 0); }
2484 
2485 double lt0(const double &x) { return (x < 0); }
2486 
2487 Writer ge0(const Writer &x) {
2488  return "ge0"
2489  "(" +
2490  x + ")";
2491 }
2492 const char *Ge0Op::op_name() { return "Ge0Op"; }
2493 ad_plain ge0(const ad_plain &x) { return get_glob()->add_to_stack<Ge0Op>(x); }
2494 ad_aug ge0(const ad_aug &x) {
2495  if (x.constant())
2496  return Scalar(ge0(x.Value()));
2497  else
2498  return ge0(ad_plain(x));
2499 }
2500 
2501 Writer lt0(const Writer &x) {
2502  return "lt0"
2503  "(" +
2504  x + ")";
2505 }
2506 const char *Lt0Op::op_name() { return "Lt0Op"; }
2507 ad_plain lt0(const ad_plain &x) { return get_glob()->add_to_stack<Lt0Op>(x); }
2508 ad_aug lt0(const ad_aug &x) {
2509  if (x.constant())
2510  return Scalar(lt0(x.Value()));
2511  else
2512  return lt0(ad_plain(x));
2513 }
2514 
2515 Writer fabs(const Writer &x) {
2516  return "fabs"
2517  "(" +
2518  x + ")";
2519 }
2520 void AbsOp::reverse(ReverseArgs<Scalar> &args) {
2521  typedef Scalar Type;
2522  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * sign(args.x(0));
2523 }
2524 const char *AbsOp::op_name() { return "AbsOp"; }
2525 ad_plain fabs(const ad_plain &x) { return get_glob()->add_to_stack<AbsOp>(x); }
2526 ad_aug fabs(const ad_aug &x) {
2527  if (x.constant())
2528  return Scalar(fabs(x.Value()));
2529  else
2530  return fabs(ad_plain(x));
2531 }
2532 ad_adapt fabs(const ad_adapt &x) { return ad_adapt(fabs(ad_aug(x))); }
2533 
2534 Writer sin(const Writer &x) {
2535  return "sin"
2536  "(" +
2537  x + ")";
2538 }
2539 void SinOp::reverse(ReverseArgs<Scalar> &args) {
2540  typedef Scalar Type;
2541  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * cos(args.x(0));
2542 }
2543 const char *SinOp::op_name() { return "SinOp"; }
2544 ad_plain sin(const ad_plain &x) { return get_glob()->add_to_stack<SinOp>(x); }
2545 ad_aug sin(const ad_aug &x) {
2546  if (x.constant())
2547  return Scalar(sin(x.Value()));
2548  else
2549  return sin(ad_plain(x));
2550 }
2551 ad_adapt sin(const ad_adapt &x) { return ad_adapt(sin(ad_aug(x))); }
2552 
2553 Writer cos(const Writer &x) {
2554  return "cos"
2555  "(" +
2556  x + ")";
2557 }
2558 void CosOp::reverse(ReverseArgs<Scalar> &args) {
2559  typedef Scalar Type;
2560  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * -sin(args.x(0));
2561 }
2562 const char *CosOp::op_name() { return "CosOp"; }
2563 ad_plain cos(const ad_plain &x) { return get_glob()->add_to_stack<CosOp>(x); }
2564 ad_aug cos(const ad_aug &x) {
2565  if (x.constant())
2566  return Scalar(cos(x.Value()));
2567  else
2568  return cos(ad_plain(x));
2569 }
2570 ad_adapt cos(const ad_adapt &x) { return ad_adapt(cos(ad_aug(x))); }
2571 
2572 Writer exp(const Writer &x) {
2573  return "exp"
2574  "(" +
2575  x + ")";
2576 }
2577 void ExpOp::reverse(ReverseArgs<Scalar> &args) {
2578  typedef Scalar Type;
2579  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * args.y(0);
2580 }
2581 const char *ExpOp::op_name() { return "ExpOp"; }
2582 ad_plain exp(const ad_plain &x) { return get_glob()->add_to_stack<ExpOp>(x); }
2583 ad_aug exp(const ad_aug &x) {
2584  if (x.constant())
2585  return Scalar(exp(x.Value()));
2586  else
2587  return exp(ad_plain(x));
2588 }
2589 ad_adapt exp(const ad_adapt &x) { return ad_adapt(exp(ad_aug(x))); }
2590 
2591 Writer log(const Writer &x) {
2592  return "log"
2593  "(" +
2594  x + ")";
2595 }
2596 void LogOp::reverse(ReverseArgs<Scalar> &args) {
2597  typedef Scalar Type;
2598  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * Type(1.) / args.x(0);
2599 }
2600 const char *LogOp::op_name() { return "LogOp"; }
2601 ad_plain log(const ad_plain &x) { return get_glob()->add_to_stack<LogOp>(x); }
2602 ad_aug log(const ad_aug &x) {
2603  if (x.constant())
2604  return Scalar(log(x.Value()));
2605  else
2606  return log(ad_plain(x));
2607 }
2608 ad_adapt log(const ad_adapt &x) { return ad_adapt(log(ad_aug(x))); }
2609 
2610 Writer sqrt(const Writer &x) {
2611  return "sqrt"
2612  "(" +
2613  x + ")";
2614 }
2615 void SqrtOp::reverse(ReverseArgs<Scalar> &args) {
2616  typedef Scalar Type;
2617  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * Type(0.5) / args.y(0);
2618 }
2619 const char *SqrtOp::op_name() { return "SqrtOp"; }
2620 ad_plain sqrt(const ad_plain &x) { return get_glob()->add_to_stack<SqrtOp>(x); }
2621 ad_aug sqrt(const ad_aug &x) {
2622  if (x.constant())
2623  return Scalar(sqrt(x.Value()));
2624  else
2625  return sqrt(ad_plain(x));
2626 }
2627 ad_adapt sqrt(const ad_adapt &x) { return ad_adapt(sqrt(ad_aug(x))); }
2628 
2629 Writer tan(const Writer &x) {
2630  return "tan"
2631  "(" +
2632  x + ")";
2633 }
2634 void TanOp::reverse(ReverseArgs<Scalar> &args) {
2635  typedef Scalar Type;
2636  if (args.dy(0) != Type(0))
2637  args.dx(0) += args.dy(0) * Type(1.) / (cos(args.x(0)) * cos(args.x(0)));
2638 }
2639 const char *TanOp::op_name() { return "TanOp"; }
2640 ad_plain tan(const ad_plain &x) { return get_glob()->add_to_stack<TanOp>(x); }
2641 ad_aug tan(const ad_aug &x) {
2642  if (x.constant())
2643  return Scalar(tan(x.Value()));
2644  else
2645  return tan(ad_plain(x));
2646 }
2647 ad_adapt tan(const ad_adapt &x) { return ad_adapt(tan(ad_aug(x))); }
2648 
2649 Writer sinh(const Writer &x) {
2650  return "sinh"
2651  "(" +
2652  x + ")";
2653 }
2654 void SinhOp::reverse(ReverseArgs<Scalar> &args) {
2655  typedef Scalar Type;
2656  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * cosh(args.x(0));
2657 }
2658 const char *SinhOp::op_name() { return "SinhOp"; }
2659 ad_plain sinh(const ad_plain &x) { return get_glob()->add_to_stack<SinhOp>(x); }
2660 ad_aug sinh(const ad_aug &x) {
2661  if (x.constant())
2662  return Scalar(sinh(x.Value()));
2663  else
2664  return sinh(ad_plain(x));
2665 }
2666 ad_adapt sinh(const ad_adapt &x) { return ad_adapt(sinh(ad_aug(x))); }
2667 
2668 Writer cosh(const Writer &x) {
2669  return "cosh"
2670  "(" +
2671  x + ")";
2672 }
2673 void CoshOp::reverse(ReverseArgs<Scalar> &args) {
2674  typedef Scalar Type;
2675  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * sinh(args.x(0));
2676 }
2677 const char *CoshOp::op_name() { return "CoshOp"; }
2678 ad_plain cosh(const ad_plain &x) { return get_glob()->add_to_stack<CoshOp>(x); }
2679 ad_aug cosh(const ad_aug &x) {
2680  if (x.constant())
2681  return Scalar(cosh(x.Value()));
2682  else
2683  return cosh(ad_plain(x));
2684 }
2685 ad_adapt cosh(const ad_adapt &x) { return ad_adapt(cosh(ad_aug(x))); }
2686 
2687 Writer tanh(const Writer &x) {
2688  return "tanh"
2689  "(" +
2690  x + ")";
2691 }
2692 void TanhOp::reverse(ReverseArgs<Scalar> &args) {
2693  typedef Scalar Type;
2694  if (args.dy(0) != Type(0))
2695  args.dx(0) += args.dy(0) * Type(1.) / (cosh(args.x(0)) * cosh(args.x(0)));
2696 }
2697 const char *TanhOp::op_name() { return "TanhOp"; }
2698 ad_plain tanh(const ad_plain &x) { return get_glob()->add_to_stack<TanhOp>(x); }
2699 ad_aug tanh(const ad_aug &x) {
2700  if (x.constant())
2701  return Scalar(tanh(x.Value()));
2702  else
2703  return tanh(ad_plain(x));
2704 }
2705 ad_adapt tanh(const ad_adapt &x) { return ad_adapt(tanh(ad_aug(x))); }
2706 
2707 Writer expm1(const Writer &x) {
2708  return "expm1"
2709  "(" +
2710  x + ")";
2711 }
2712 void Expm1::reverse(ReverseArgs<Scalar> &args) {
2713  typedef Scalar Type;
2714  if (args.dy(0) != Type(0)) args.dx(0) += args.dy(0) * args.y(0) + Type(1.);
2715 }
2716 const char *Expm1::op_name() { return "Expm1"; }
2717 ad_plain expm1(const ad_plain &x) { return get_glob()->add_to_stack<Expm1>(x); }
2718 ad_aug expm1(const ad_aug &x) {
2719  if (x.constant())
2720  return Scalar(expm1(x.Value()));
2721  else
2722  return expm1(ad_plain(x));
2723 }
2724 ad_adapt expm1(const ad_adapt &x) { return ad_adapt(expm1(ad_aug(x))); }
2725 
2726 Writer log1p(const Writer &x) {
2727  return "log1p"
2728  "(" +
2729  x + ")";
2730 }
2731 void Log1p::reverse(ReverseArgs<Scalar> &args) {
2732  typedef Scalar Type;
2733  if (args.dy(0) != Type(0))
2734  args.dx(0) += args.dy(0) * Type(1.) / (args.x(0) + Type(1.));
2735 }
2736 const char *Log1p::op_name() { return "Log1p"; }
2737 ad_plain log1p(const ad_plain &x) { return get_glob()->add_to_stack<Log1p>(x); }
2738 ad_aug log1p(const ad_aug &x) {
2739  if (x.constant())
2740  return Scalar(log1p(x.Value()));
2741  else
2742  return log1p(ad_plain(x));
2743 }
2744 ad_adapt log1p(const ad_adapt &x) { return ad_adapt(log1p(ad_aug(x))); }
2745 
2746 Writer asin(const Writer &x) {
2747  return "asin"
2748  "(" +
2749  x + ")";
2750 }
2751 void AsinOp::reverse(ReverseArgs<Scalar> &args) {
2752  typedef Scalar Type;
2753  if (args.dy(0) != Type(0))
2754  args.dx(0) +=
2755  args.dy(0) * Type(1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
2756 }
2757 const char *AsinOp::op_name() { return "AsinOp"; }
2758 ad_plain asin(const ad_plain &x) { return get_glob()->add_to_stack<AsinOp>(x); }
2759 ad_aug asin(const ad_aug &x) {
2760  if (x.constant())
2761  return Scalar(asin(x.Value()));
2762  else
2763  return asin(ad_plain(x));
2764 }
2765 ad_adapt asin(const ad_adapt &x) { return ad_adapt(asin(ad_aug(x))); }
2766 
2767 Writer acos(const Writer &x) {
2768  return "acos"
2769  "(" +
2770  x + ")";
2771 }
2772 void AcosOp::reverse(ReverseArgs<Scalar> &args) {
2773  typedef Scalar Type;
2774  if (args.dy(0) != Type(0))
2775  args.dx(0) +=
2776  args.dy(0) * Type(-1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
2777 }
2778 const char *AcosOp::op_name() { return "AcosOp"; }
2779 ad_plain acos(const ad_plain &x) { return get_glob()->add_to_stack<AcosOp>(x); }
2780 ad_aug acos(const ad_aug &x) {
2781  if (x.constant())
2782  return Scalar(acos(x.Value()));
2783  else
2784  return acos(ad_plain(x));
2785 }
2786 ad_adapt acos(const ad_adapt &x) { return ad_adapt(acos(ad_aug(x))); }
2787 
2788 Writer atan(const Writer &x) {
2789  return "atan"
2790  "(" +
2791  x + ")";
2792 }
2793 void AtanOp::reverse(ReverseArgs<Scalar> &args) {
2794  typedef Scalar Type;
2795  if (args.dy(0) != Type(0))
2796  args.dx(0) += args.dy(0) * Type(1.) / (Type(1.) + args.x(0) * args.x(0));
2797 }
2798 const char *AtanOp::op_name() { return "AtanOp"; }
2799 ad_plain atan(const ad_plain &x) { return get_glob()->add_to_stack<AtanOp>(x); }
2800 ad_aug atan(const ad_aug &x) {
2801  if (x.constant())
2802  return Scalar(atan(x.Value()));
2803  else
2804  return atan(ad_plain(x));
2805 }
2806 ad_adapt atan(const ad_adapt &x) { return ad_adapt(atan(ad_aug(x))); }
2807 
2808 Writer asinh(const Writer &x) {
2809  return "asinh"
2810  "(" +
2811  x + ")";
2812 }
2813 void AsinhOp::reverse(ReverseArgs<Scalar> &args) {
2814  typedef Scalar Type;
2815  if (args.dy(0) != Type(0))
2816  args.dx(0) +=
2817  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) + Type(1.));
2818 }
2819 const char *AsinhOp::op_name() { return "AsinhOp"; }
2820 ad_plain asinh(const ad_plain &x) {
2821  return get_glob()->add_to_stack<AsinhOp>(x);
2822 }
2823 ad_aug asinh(const ad_aug &x) {
2824  if (x.constant())
2825  return Scalar(asinh(x.Value()));
2826  else
2827  return asinh(ad_plain(x));
2828 }
2829 ad_adapt asinh(const ad_adapt &x) { return ad_adapt(asinh(ad_aug(x))); }
2830 
2831 Writer acosh(const Writer &x) {
2832  return "acosh"
2833  "(" +
2834  x + ")";
2835 }
2836 void AcoshOp::reverse(ReverseArgs<Scalar> &args) {
2837  typedef Scalar Type;
2838  if (args.dy(0) != Type(0))
2839  args.dx(0) +=
2840  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) - Type(1.));
2841 }
2842 const char *AcoshOp::op_name() { return "AcoshOp"; }
2843 ad_plain acosh(const ad_plain &x) {
2844  return get_glob()->add_to_stack<AcoshOp>(x);
2845 }
2846 ad_aug acosh(const ad_aug &x) {
2847  if (x.constant())
2848  return Scalar(acosh(x.Value()));
2849  else
2850  return acosh(ad_plain(x));
2851 }
2852 ad_adapt acosh(const ad_adapt &x) { return ad_adapt(acosh(ad_aug(x))); }
2853 
2854 Writer atanh(const Writer &x) {
2855  return "atanh"
2856  "(" +
2857  x + ")";
2858 }
2859 void AtanhOp::reverse(ReverseArgs<Scalar> &args) {
2860  typedef Scalar Type;
2861  if (args.dy(0) != Type(0))
2862  args.dx(0) += args.dy(0) * Type(1.) / (Type(1) - args.x(0) * args.x(0));
2863 }
2864 const char *AtanhOp::op_name() { return "AtanhOp"; }
2865 ad_plain atanh(const ad_plain &x) {
2866  return get_glob()->add_to_stack<AtanhOp>(x);
2867 }
2868 ad_aug atanh(const ad_aug &x) {
2869  if (x.constant())
2870  return Scalar(atanh(x.Value()));
2871  else
2872  return atanh(ad_plain(x));
2873 }
2874 ad_adapt atanh(const ad_adapt &x) { return ad_adapt(atanh(ad_aug(x))); }
2875 
2876 Writer pow(const Writer &x1, const Writer &x2) {
2877  return "pow"
2878  "(" +
2879  x1 + "," + x2 + ")";
2880 }
2881 const char *PowOp::op_name() { return "PowOp"; }
2882 ad_plain pow(const ad_plain &x1, const ad_plain &x2) {
2883  return get_glob()->add_to_stack<PowOp>(x1, x2);
2884 }
2885 ad_aug pow(const ad_aug &x1, const ad_aug &x2) {
2886  if (x1.constant() && x2.constant())
2887  return Scalar(pow(x1.Value(), x2.Value()));
2888  else
2889  return pow(ad_plain(x1), ad_plain(x2));
2890 }
2891 ad_adapt pow(const ad_adapt &x1, const ad_adapt &x2) {
2892  return ad_adapt(pow(ad_aug(x1), ad_aug(x2)));
2893 }
2894 
2895 Writer atan2(const Writer &x1, const Writer &x2) {
2896  return "atan2"
2897  "(" +
2898  x1 + "," + x2 + ")";
2899 }
2900 const char *Atan2::op_name() { return "Atan2"; }
2901 ad_plain atan2(const ad_plain &x1, const ad_plain &x2) {
2902  return get_glob()->add_to_stack<Atan2>(x1, x2);
2903 }
2904 ad_aug atan2(const ad_aug &x1, const ad_aug &x2) {
2905  if (x1.constant() && x2.constant())
2906  return Scalar(atan2(x1.Value(), x2.Value()));
2907  else
2908  return atan2(ad_plain(x1), ad_plain(x2));
2909 }
2910 ad_adapt atan2(const ad_adapt &x1, const ad_adapt &x2) {
2911  return ad_adapt(atan2(ad_aug(x1), ad_aug(x2)));
2912 }
2913 
2914 Writer max(const Writer &x1, const Writer &x2) {
2915  return "max"
2916  "(" +
2917  x1 + "," + x2 + ")";
2918 }
2919 const char *MaxOp::op_name() { return "MaxOp"; }
2920 ad_plain max(const ad_plain &x1, const ad_plain &x2) {
2921  return get_glob()->add_to_stack<MaxOp>(x1, x2);
2922 }
2923 ad_aug max(const ad_aug &x1, const ad_aug &x2) {
2924  if (x1.constant() && x2.constant())
2925  return Scalar(max(x1.Value(), x2.Value()));
2926  else
2927  return max(ad_plain(x1), ad_plain(x2));
2928 }
2929 ad_adapt max(const ad_adapt &x1, const ad_adapt &x2) {
2930  return ad_adapt(max(ad_aug(x1), ad_aug(x2)));
2931 }
2932 
2933 Writer min(const Writer &x1, const Writer &x2) {
2934  return "min"
2935  "(" +
2936  x1 + "," + x2 + ")";
2937 }
2938 const char *MinOp::op_name() { return "MinOp"; }
2939 ad_plain min(const ad_plain &x1, const ad_plain &x2) {
2940  return get_glob()->add_to_stack<MinOp>(x1, x2);
2941 }
2942 ad_aug min(const ad_aug &x1, const ad_aug &x2) {
2943  if (x1.constant() && x2.constant())
2944  return Scalar(min(x1.Value(), x2.Value()));
2945  else
2946  return min(ad_plain(x1), ad_plain(x2));
2947 }
2948 ad_adapt min(const ad_adapt &x1, const ad_adapt &x2) {
2949  return ad_adapt(min(ad_aug(x1), ad_aug(x2)));
2950 }
2951 void CondExpEqOp::forward(ForwardArgs<Scalar> &args) {
2952  if (args.x(0) == args.x(1)) {
2953  args.y(0) = args.x(2);
2954  } else {
2955  args.y(0) = args.x(3);
2956  }
2957 }
2958 void CondExpEqOp::reverse(ReverseArgs<Scalar> &args) {
2959  if (args.x(0) == args.x(1)) {
2960  args.dx(2) += args.dy(0);
2961  } else {
2962  args.dx(3) += args.dy(0);
2963  }
2964 }
2965 void CondExpEqOp::forward(ForwardArgs<Replay> &args) {
2966  args.y(0) = CondExpEq(args.x(0), args.x(1), args.x(2), args.x(3));
2967 }
2968 void CondExpEqOp::reverse(ReverseArgs<Replay> &args) {
2969  Replay zero(0);
2970  args.dx(2) += CondExpEq(args.x(0), args.x(1), args.dy(0), zero);
2971  args.dx(3) += CondExpEq(args.x(0), args.x(1), zero, args.dy(0));
2972 }
2973 void CondExpEqOp::forward(ForwardArgs<Writer> &args) {
2974  Writer w;
2975  w << "if (" << args.x(0) << "==" << args.x(1) << ") ";
2976  args.y(0) = args.x(2);
2977  w << " else ";
2978  args.y(0) = args.x(3);
2979 }
2980 void CondExpEqOp::reverse(ReverseArgs<Writer> &args) {
2981  Writer w;
2982  w << "if (" << args.x(0) << "==" << args.x(1) << ") ";
2983  args.dx(2) += args.dy(0);
2984  w << " else ";
2985  args.dx(3) += args.dy(0);
2986 }
2987 const char *CondExpEqOp::op_name() {
2988  return "CExp"
2989  "Eq";
2990 }
2991 Scalar CondExpEq(const Scalar &x0, const Scalar &x1, const Scalar &x2,
2992  const Scalar &x3) {
2993  if (x0 == x1)
2994  return x2;
2995  else
2996  return x3;
2997 }
2998 ad_plain CondExpEq(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
2999  const ad_plain &x3) {
3000  OperatorPure *pOp = get_glob()->getOperator<CondExpEqOp>();
3001  std::vector<ad_plain> x(4);
3002  x[0] = x0;
3003  x[1] = x1;
3004  x[2] = x2;
3005  x[3] = x3;
3006  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpEqOp>(pOp, x);
3007  return y[0];
3008 }
3009 ad_aug CondExpEq(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3010  const ad_aug &x3) {
3011  if (x0.constant() && x1.constant()) {
3012  if (x0.Value() == x1.Value())
3013  return x2;
3014  else
3015  return x3;
3016  } else {
3017  return CondExpEq(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3018  }
3019 }
3020 void CondExpNeOp::forward(ForwardArgs<Scalar> &args) {
3021  if (args.x(0) != args.x(1)) {
3022  args.y(0) = args.x(2);
3023  } else {
3024  args.y(0) = args.x(3);
3025  }
3026 }
3027 void CondExpNeOp::reverse(ReverseArgs<Scalar> &args) {
3028  if (args.x(0) != args.x(1)) {
3029  args.dx(2) += args.dy(0);
3030  } else {
3031  args.dx(3) += args.dy(0);
3032  }
3033 }
3034 void CondExpNeOp::forward(ForwardArgs<Replay> &args) {
3035  args.y(0) = CondExpNe(args.x(0), args.x(1), args.x(2), args.x(3));
3036 }
3037 void CondExpNeOp::reverse(ReverseArgs<Replay> &args) {
3038  Replay zero(0);
3039  args.dx(2) += CondExpNe(args.x(0), args.x(1), args.dy(0), zero);
3040  args.dx(3) += CondExpNe(args.x(0), args.x(1), zero, args.dy(0));
3041 }
3042 void CondExpNeOp::forward(ForwardArgs<Writer> &args) {
3043  Writer w;
3044  w << "if (" << args.x(0) << "!=" << args.x(1) << ") ";
3045  args.y(0) = args.x(2);
3046  w << " else ";
3047  args.y(0) = args.x(3);
3048 }
3049 void CondExpNeOp::reverse(ReverseArgs<Writer> &args) {
3050  Writer w;
3051  w << "if (" << args.x(0) << "!=" << args.x(1) << ") ";
3052  args.dx(2) += args.dy(0);
3053  w << " else ";
3054  args.dx(3) += args.dy(0);
3055 }
3056 const char *CondExpNeOp::op_name() {
3057  return "CExp"
3058  "Ne";
3059 }
3060 Scalar CondExpNe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3061  const Scalar &x3) {
3062  if (x0 != x1)
3063  return x2;
3064  else
3065  return x3;
3066 }
3067 ad_plain CondExpNe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3068  const ad_plain &x3) {
3069  OperatorPure *pOp = get_glob()->getOperator<CondExpNeOp>();
3070  std::vector<ad_plain> x(4);
3071  x[0] = x0;
3072  x[1] = x1;
3073  x[2] = x2;
3074  x[3] = x3;
3075  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpNeOp>(pOp, x);
3076  return y[0];
3077 }
3078 ad_aug CondExpNe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3079  const ad_aug &x3) {
3080  if (x0.constant() && x1.constant()) {
3081  if (x0.Value() != x1.Value())
3082  return x2;
3083  else
3084  return x3;
3085  } else {
3086  return CondExpNe(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3087  }
3088 }
3089 void CondExpGtOp::forward(ForwardArgs<Scalar> &args) {
3090  if (args.x(0) > args.x(1)) {
3091  args.y(0) = args.x(2);
3092  } else {
3093  args.y(0) = args.x(3);
3094  }
3095 }
3096 void CondExpGtOp::reverse(ReverseArgs<Scalar> &args) {
3097  if (args.x(0) > args.x(1)) {
3098  args.dx(2) += args.dy(0);
3099  } else {
3100  args.dx(3) += args.dy(0);
3101  }
3102 }
3103 void CondExpGtOp::forward(ForwardArgs<Replay> &args) {
3104  args.y(0) = CondExpGt(args.x(0), args.x(1), args.x(2), args.x(3));
3105 }
3106 void CondExpGtOp::reverse(ReverseArgs<Replay> &args) {
3107  Replay zero(0);
3108  args.dx(2) += CondExpGt(args.x(0), args.x(1), args.dy(0), zero);
3109  args.dx(3) += CondExpGt(args.x(0), args.x(1), zero, args.dy(0));
3110 }
3111 void CondExpGtOp::forward(ForwardArgs<Writer> &args) {
3112  Writer w;
3113  w << "if (" << args.x(0) << ">" << args.x(1) << ") ";
3114  args.y(0) = args.x(2);
3115  w << " else ";
3116  args.y(0) = args.x(3);
3117 }
3118 void CondExpGtOp::reverse(ReverseArgs<Writer> &args) {
3119  Writer w;
3120  w << "if (" << args.x(0) << ">" << args.x(1) << ") ";
3121  args.dx(2) += args.dy(0);
3122  w << " else ";
3123  args.dx(3) += args.dy(0);
3124 }
3125 const char *CondExpGtOp::op_name() {
3126  return "CExp"
3127  "Gt";
3128 }
3129 Scalar CondExpGt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3130  const Scalar &x3) {
3131  if (x0 > x1)
3132  return x2;
3133  else
3134  return x3;
3135 }
3136 ad_plain CondExpGt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3137  const ad_plain &x3) {
3138  OperatorPure *pOp = get_glob()->getOperator<CondExpGtOp>();
3139  std::vector<ad_plain> x(4);
3140  x[0] = x0;
3141  x[1] = x1;
3142  x[2] = x2;
3143  x[3] = x3;
3144  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpGtOp>(pOp, x);
3145  return y[0];
3146 }
3147 ad_aug CondExpGt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3148  const ad_aug &x3) {
3149  if (x0.constant() && x1.constant()) {
3150  if (x0.Value() > x1.Value())
3151  return x2;
3152  else
3153  return x3;
3154  } else {
3155  return CondExpGt(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3156  }
3157 }
3158 void CondExpLtOp::forward(ForwardArgs<Scalar> &args) {
3159  if (args.x(0) < args.x(1)) {
3160  args.y(0) = args.x(2);
3161  } else {
3162  args.y(0) = args.x(3);
3163  }
3164 }
3165 void CondExpLtOp::reverse(ReverseArgs<Scalar> &args) {
3166  if (args.x(0) < args.x(1)) {
3167  args.dx(2) += args.dy(0);
3168  } else {
3169  args.dx(3) += args.dy(0);
3170  }
3171 }
3172 void CondExpLtOp::forward(ForwardArgs<Replay> &args) {
3173  args.y(0) = CondExpLt(args.x(0), args.x(1), args.x(2), args.x(3));
3174 }
3175 void CondExpLtOp::reverse(ReverseArgs<Replay> &args) {
3176  Replay zero(0);
3177  args.dx(2) += CondExpLt(args.x(0), args.x(1), args.dy(0), zero);
3178  args.dx(3) += CondExpLt(args.x(0), args.x(1), zero, args.dy(0));
3179 }
3180 void CondExpLtOp::forward(ForwardArgs<Writer> &args) {
3181  Writer w;
3182  w << "if (" << args.x(0) << "<" << args.x(1) << ") ";
3183  args.y(0) = args.x(2);
3184  w << " else ";
3185  args.y(0) = args.x(3);
3186 }
3187 void CondExpLtOp::reverse(ReverseArgs<Writer> &args) {
3188  Writer w;
3189  w << "if (" << args.x(0) << "<" << args.x(1) << ") ";
3190  args.dx(2) += args.dy(0);
3191  w << " else ";
3192  args.dx(3) += args.dy(0);
3193 }
3194 const char *CondExpLtOp::op_name() {
3195  return "CExp"
3196  "Lt";
3197 }
3198 Scalar CondExpLt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3199  const Scalar &x3) {
3200  if (x0 < x1)
3201  return x2;
3202  else
3203  return x3;
3204 }
3205 ad_plain CondExpLt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3206  const ad_plain &x3) {
3207  OperatorPure *pOp = get_glob()->getOperator<CondExpLtOp>();
3208  std::vector<ad_plain> x(4);
3209  x[0] = x0;
3210  x[1] = x1;
3211  x[2] = x2;
3212  x[3] = x3;
3213  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpLtOp>(pOp, x);
3214  return y[0];
3215 }
3216 ad_aug CondExpLt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3217  const ad_aug &x3) {
3218  if (x0.constant() && x1.constant()) {
3219  if (x0.Value() < x1.Value())
3220  return x2;
3221  else
3222  return x3;
3223  } else {
3224  return CondExpLt(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3225  }
3226 }
3227 void CondExpGeOp::forward(ForwardArgs<Scalar> &args) {
3228  if (args.x(0) >= args.x(1)) {
3229  args.y(0) = args.x(2);
3230  } else {
3231  args.y(0) = args.x(3);
3232  }
3233 }
3234 void CondExpGeOp::reverse(ReverseArgs<Scalar> &args) {
3235  if (args.x(0) >= args.x(1)) {
3236  args.dx(2) += args.dy(0);
3237  } else {
3238  args.dx(3) += args.dy(0);
3239  }
3240 }
3241 void CondExpGeOp::forward(ForwardArgs<Replay> &args) {
3242  args.y(0) = CondExpGe(args.x(0), args.x(1), args.x(2), args.x(3));
3243 }
3244 void CondExpGeOp::reverse(ReverseArgs<Replay> &args) {
3245  Replay zero(0);
3246  args.dx(2) += CondExpGe(args.x(0), args.x(1), args.dy(0), zero);
3247  args.dx(3) += CondExpGe(args.x(0), args.x(1), zero, args.dy(0));
3248 }
3249 void CondExpGeOp::forward(ForwardArgs<Writer> &args) {
3250  Writer w;
3251  w << "if (" << args.x(0) << ">=" << args.x(1) << ") ";
3252  args.y(0) = args.x(2);
3253  w << " else ";
3254  args.y(0) = args.x(3);
3255 }
3256 void CondExpGeOp::reverse(ReverseArgs<Writer> &args) {
3257  Writer w;
3258  w << "if (" << args.x(0) << ">=" << args.x(1) << ") ";
3259  args.dx(2) += args.dy(0);
3260  w << " else ";
3261  args.dx(3) += args.dy(0);
3262 }
3263 const char *CondExpGeOp::op_name() {
3264  return "CExp"
3265  "Ge";
3266 }
3267 Scalar CondExpGe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3268  const Scalar &x3) {
3269  if (x0 >= x1)
3270  return x2;
3271  else
3272  return x3;
3273 }
3274 ad_plain CondExpGe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3275  const ad_plain &x3) {
3276  OperatorPure *pOp = get_glob()->getOperator<CondExpGeOp>();
3277  std::vector<ad_plain> x(4);
3278  x[0] = x0;
3279  x[1] = x1;
3280  x[2] = x2;
3281  x[3] = x3;
3282  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpGeOp>(pOp, x);
3283  return y[0];
3284 }
3285 ad_aug CondExpGe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3286  const ad_aug &x3) {
3287  if (x0.constant() && x1.constant()) {
3288  if (x0.Value() >= x1.Value())
3289  return x2;
3290  else
3291  return x3;
3292  } else {
3293  return CondExpGe(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3294  }
3295 }
3296 void CondExpLeOp::forward(ForwardArgs<Scalar> &args) {
3297  if (args.x(0) <= args.x(1)) {
3298  args.y(0) = args.x(2);
3299  } else {
3300  args.y(0) = args.x(3);
3301  }
3302 }
3303 void CondExpLeOp::reverse(ReverseArgs<Scalar> &args) {
3304  if (args.x(0) <= args.x(1)) {
3305  args.dx(2) += args.dy(0);
3306  } else {
3307  args.dx(3) += args.dy(0);
3308  }
3309 }
3310 void CondExpLeOp::forward(ForwardArgs<Replay> &args) {
3311  args.y(0) = CondExpLe(args.x(0), args.x(1), args.x(2), args.x(3));
3312 }
3313 void CondExpLeOp::reverse(ReverseArgs<Replay> &args) {
3314  Replay zero(0);
3315  args.dx(2) += CondExpLe(args.x(0), args.x(1), args.dy(0), zero);
3316  args.dx(3) += CondExpLe(args.x(0), args.x(1), zero, args.dy(0));
3317 }
3318 void CondExpLeOp::forward(ForwardArgs<Writer> &args) {
3319  Writer w;
3320  w << "if (" << args.x(0) << "<=" << args.x(1) << ") ";
3321  args.y(0) = args.x(2);
3322  w << " else ";
3323  args.y(0) = args.x(3);
3324 }
3325 void CondExpLeOp::reverse(ReverseArgs<Writer> &args) {
3326  Writer w;
3327  w << "if (" << args.x(0) << "<=" << args.x(1) << ") ";
3328  args.dx(2) += args.dy(0);
3329  w << " else ";
3330  args.dx(3) += args.dy(0);
3331 }
3332 const char *CondExpLeOp::op_name() {
3333  return "CExp"
3334  "Le";
3335 }
3336 Scalar CondExpLe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3337  const Scalar &x3) {
3338  if (x0 <= x1)
3339  return x2;
3340  else
3341  return x3;
3342 }
3343 ad_plain CondExpLe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3344  const ad_plain &x3) {
3345  OperatorPure *pOp = get_glob()->getOperator<CondExpLeOp>();
3346  std::vector<ad_plain> x(4);
3347  x[0] = x0;
3348  x[1] = x1;
3349  x[2] = x2;
3350  x[3] = x3;
3351  std::vector<ad_plain> y = get_glob()->add_to_stack<CondExpLeOp>(pOp, x);
3352  return y[0];
3353 }
3354 ad_aug CondExpLe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3355  const ad_aug &x3) {
3356  if (x0.constant() && x1.constant()) {
3357  if (x0.Value() <= x1.Value())
3358  return x2;
3359  else
3360  return x3;
3361  } else {
3362  return CondExpLe(ad_plain(x0), ad_plain(x1), ad_plain(x2), ad_plain(x3));
3363  }
3364 }
3365 
3366 Index SumOp::input_size() const { return n; }
3367 
3368 Index SumOp::output_size() const { return 1; }
3369 
3370 SumOp::SumOp(size_t n) : n(n) {}
3371 
3372 const char *SumOp::op_name() { return "SumOp"; }
3373 
3374 Index LogSpaceSumOp::input_size() const { return this->n; }
3375 
3376 Index LogSpaceSumOp::output_size() const { return 1; }
3377 
3378 LogSpaceSumOp::LogSpaceSumOp(size_t n) : n(n) {}
3379 
3380 void LogSpaceSumOp::forward(ForwardArgs<Scalar> &args) {
3381  Scalar Max = -INFINITY;
3382  for (size_t i = 0; i < n; i++) {
3383  if (Max < args.x(i)) Max = args.x(i);
3384  }
3385  args.y(0) = 0;
3386  for (size_t i = 0; i < n; i++) {
3387  args.y(0) += exp(args.x(i) - Max);
3388  }
3389  args.y(0) = Max + log(args.y(0));
3390 }
3391 
3392 void LogSpaceSumOp::forward(ForwardArgs<Replay> &args) {
3393  std::vector<ad_plain> x(input_size());
3394  for (Index i = 0; i < input_size(); i++) x[i] = args.x(i);
3395  args.y(0) = logspace_sum(x);
3396 }
3397 
3398 const char *LogSpaceSumOp::op_name() { return "LSSumOp"; }
3399 
3400 ad_plain logspace_sum(const std::vector<ad_plain> &x) {
3401  OperatorPure *pOp = get_glob()->getOperator<LogSpaceSumOp>(x.size());
3402  return get_glob()->add_to_stack<LogSpaceSumOp>(pOp, x)[0];
3403 }
3404 
3405 Index LogSpaceSumStrideOp::number_of_terms() const { return stride.size(); }
3406 
3407 Index LogSpaceSumStrideOp::input_size() const { return number_of_terms(); }
3408 
3409 Index LogSpaceSumStrideOp::output_size() const { return 1; }
3410 
3411 LogSpaceSumStrideOp::LogSpaceSumStrideOp(std::vector<Index> stride, size_t n)
3412  : stride(stride), n(n) {}
3413 
3414 void LogSpaceSumStrideOp::forward(ForwardArgs<Scalar> &args) {
3415  Scalar Max = -INFINITY;
3416 
3417  size_t m = stride.size();
3418  std::vector<Scalar *> wrk(m);
3419  Scalar **px = &(wrk[0]);
3420  for (size_t i = 0; i < m; i++) {
3421  px[i] = args.x_ptr(i);
3422  }
3423 
3424  for (size_t i = 0; i < n; i++) {
3425  Scalar s = rowsum(px, i);
3426  if (Max < s) Max = s;
3427  }
3428 
3429  args.y(0) = 0;
3430  for (size_t i = 0; i < n; i++) {
3431  Scalar s = rowsum(px, i);
3432  args.y(0) += exp(s - Max);
3433  }
3434  args.y(0) = Max + log(args.y(0));
3435 }
3436 
3437 void LogSpaceSumStrideOp::forward(ForwardArgs<Replay> &args) {
3438  std::vector<ad_plain> x(input_size());
3439  for (Index i = 0; i < input_size(); i++) x[i] = args.x(i);
3440  args.y(0) = logspace_sum_stride(x, stride, n);
3441 }
3442 
3443 void LogSpaceSumStrideOp::dependencies(Args<> &args, Dependencies &dep) const {
3444  for (size_t j = 0; j < (size_t)number_of_terms(); j++) {
3445  size_t K = n * stride[j];
3446  dep.add_segment(args.input(j), K);
3447  }
3448 }
3449 
3450 const char *LogSpaceSumStrideOp::op_name() { return "LSStride"; }
3451 
3452 void LogSpaceSumStrideOp::forward(ForwardArgs<Writer> &args) {
3453  TMBAD_ASSERT(false);
3454 }
3455 
3456 void LogSpaceSumStrideOp::reverse(ReverseArgs<Writer> &args) {
3457  TMBAD_ASSERT(false);
3458 }
3459 
3460 ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
3461  const std::vector<Index> &stride, size_t n) {
3462  TMBAD_ASSERT(x.size() == stride.size());
3463  OperatorPure *pOp = get_glob()->getOperator<LogSpaceSumStrideOp>(stride, n);
3464  return get_glob()->add_to_stack<LogSpaceSumStrideOp>(pOp, x)[0];
3465 }
3466 } // namespace TMBad
3467 // Autogenerated - do not edit by hand !
3468 #include "graph2dot.hpp"
3469 namespace TMBad {
3470 
3471 void graph2dot(global glob, graph G, bool show_id, std::ostream &cout) {
3472  cout << "digraph graphname {\n";
3473  for (size_t i = 0; i < glob.opstack.size(); i++) {
3474  if (!show_id)
3475  cout << i << " [label=\"" << glob.opstack[i]->op_name() << "\"];\n";
3476  else
3477  cout << i << " [label=\"" << glob.opstack[i]->op_name() << " " << i
3478  << "\"];\n";
3479  }
3480  for (size_t node = 0; node < G.num_nodes(); node++) {
3481  for (size_t k = 0; k < G.num_neighbors(node); k++) {
3482  cout << node << " -> " << G.neighbors(node)[k] << ";\n";
3483  }
3484  }
3485  for (size_t i = 0; i < glob.subgraph_seq.size(); i++) {
3486  size_t node = glob.subgraph_seq[i];
3487  cout << node << " [style=\"filled\"];\n";
3488  }
3489 
3490  std::vector<Index> v2o = glob.var2op();
3491 
3492  cout << "{rank=same;";
3493  for (size_t i = 0; i < glob.inv_index.size(); i++) {
3494  cout << v2o[glob.inv_index[i]] << ";";
3495  }
3496  cout << "}\n";
3497 
3498  cout << "{rank=same;";
3499  for (size_t i = 0; i < glob.dep_index.size(); i++) {
3500  cout << v2o[glob.dep_index[i]] << ";";
3501  }
3502  cout << "}\n";
3503 
3504  cout << "}\n";
3505 }
3506 
3507 void graph2dot(global glob, bool show_id, std::ostream &cout) {
3508  graph G = glob.forward_graph();
3509  graph2dot(glob, G, show_id, cout);
3510 }
3511 
3512 void graph2dot(const char *filename, global glob, graph G, bool show_id) {
3513  std::ofstream myfile;
3514  myfile.open(filename);
3515  graph2dot(glob, G, show_id, myfile);
3516  myfile.close();
3517 }
3518 
3519 void graph2dot(const char *filename, global glob, bool show_id) {
3520  std::ofstream myfile;
3521  myfile.open(filename);
3522  graph2dot(glob, show_id, myfile);
3523  myfile.close();
3524 }
3525 } // namespace TMBad
3526 // Autogenerated - do not edit by hand !
3527 #include "graph_transform.hpp"
3528 namespace TMBad {
3529 
3530 std::vector<size_t> which(const std::vector<bool> &x) {
3531  return which<size_t>(x);
3532 }
3533 
3534 size_t prod_int(const std::vector<size_t> &x) {
3535  size_t ans = 1;
3536  for (size_t i = 0; i < x.size(); i++) ans *= x[i];
3537  return ans;
3538 }
3539 
3540 std::vector<bool> reverse_boundary(global &glob,
3541  const std::vector<bool> &vars) {
3542  std::vector<bool> boundary(vars);
3543  std::vector<bool> node_filter = glob.var2op(vars);
3544  glob.reverse_sub(boundary, node_filter);
3545 
3546  for (size_t i = 0; i < vars.size(); i++) boundary[i] = boundary[i] ^ vars[i];
3547  return boundary;
3548 }
3549 
3550 std::vector<Index> get_accumulation_tree(global &glob, bool boundary) {
3551  std::vector<OperatorPure *> &opstack = glob.opstack;
3552 
3553  std::vector<bool> node_subset(opstack.size(), false);
3554  for (size_t i = 0; i < opstack.size(); i++) {
3555  node_subset[i] = opstack[i]->info().test(op_info::is_linear);
3556  }
3557 
3558  node_subset.flip();
3559 
3560  std::vector<bool> var_subset = glob.op2var(node_subset);
3561 
3562  glob.reverse(var_subset);
3563 
3564  var_subset.flip();
3565 
3566  if (boundary) var_subset = reverse_boundary(glob, var_subset);
3567 
3568  node_subset = glob.var2op(var_subset);
3569 
3570  return which<Index>(node_subset);
3571 }
3572 
3573 std::vector<Index> find_op_by_name(global &glob, const char *name) {
3574  std::vector<Index> ans;
3575  std::vector<OperatorPure *> &opstack = glob.opstack;
3576  for (size_t i = 0; i < opstack.size(); i++) {
3577  if (!strcmp(opstack[i]->op_name(), name)) {
3578  ans.push_back(i);
3579  }
3580  }
3581  return ans;
3582 }
3583 
3584 std::vector<Index> substitute(global &glob, const std::vector<Index> &seq,
3585  bool inv_tags, bool dep_tags) {
3586  std::vector<OperatorPure *> &opstack = glob.opstack;
3587  std::vector<Index> seq2(seq);
3588  make_space_inplace(opstack, seq2);
3589  OperatorPure *invop = glob.getOperator<global::InvOp>();
3590  for (size_t i = 0; i < seq2.size(); i++) {
3591  OperatorPure *op = opstack[seq2[i]];
3592  if (inv_tags) TMBAD_ASSERT(op != invop);
3593  size_t nin = op->input_size();
3594  size_t nou = op->output_size();
3595  opstack[seq2[i] - 1] = glob.getOperator<global::NullOp2>(nin, 0);
3596  opstack[seq2[i]] = glob.getOperator<global::NullOp2>(0, nou);
3597  op->deallocate();
3598  }
3600  std::vector<Index> new_inv = glob.op2var(seq2);
3601  if (!inv_tags) glob.inv_index.resize(0);
3602  if (!dep_tags) glob.dep_index.resize(0);
3603  glob.inv_index.insert(glob.inv_index.end(), new_inv.begin(), new_inv.end());
3604  return new_inv;
3605 }
3606 
3607 std::vector<Index> substitute(global &glob, const char *name, bool inv_tags,
3608  bool dep_tags) {
3609  std::vector<Index> seq = find_op_by_name(glob, name);
3610  return substitute(glob, seq, inv_tags, dep_tags);
3611 }
3612 
3614  global glob_tree = glob;
3615 
3616  std::vector<Index> boundary = get_accumulation_tree(glob, true);
3617 
3618  substitute(glob_tree, boundary, false, true);
3619  glob_tree.eliminate();
3620 
3621  size_t n = glob_tree.inv_index.size();
3622 
3623  std::vector<Scalar> x0(n);
3624  for (size_t i = 0; i < n; i++) x0[i] = glob_tree.value_inv(i);
3625  glob_tree.forward();
3626  glob_tree.clear_deriv();
3627  glob_tree.deriv_dep(0) = 1;
3628  glob_tree.reverse();
3629  Scalar V = glob_tree.value_dep(0);
3630  std::vector<Scalar> J(n);
3631  for (size_t i = 0; i < n; i++) J[i] = glob_tree.deriv_inv(i);
3632 
3633  for (size_t i = 0; i < n; i++) V -= J[i] * x0[i];
3634 
3635  std::vector<Index> vars = glob.op2var(boundary);
3636  glob.dep_index.resize(0);
3637  glob.ad_start();
3638  std::vector<ad_aug_index> res(vars.begin(), vars.end());
3639  for (size_t i = 0; i < vars.size(); i++) {
3640  res[i] = res[i] * J[i];
3641  if (i == 0) res[i] += V;
3642  if (!sum_) res[i].Dependent();
3643  }
3644  if (sum_) {
3645  ad_aug sum_res = sum(res);
3646  sum_res.Dependent();
3647  }
3648  glob.ad_stop();
3649  glob.eliminate();
3650  return glob;
3651 }
3652 
3653 void aggregate(global &glob, int sign) {
3654  TMBAD_ASSERT((sign == 1) || (sign == -1));
3655  glob.ad_start();
3656  std::vector<ad_aug_index> x(glob.dep_index.begin(), glob.dep_index.end());
3657  ad_aug y = 0;
3658  for (size_t i = 0; i < x.size(); i++) y += x[i];
3659  if (sign < 0) y = -y;
3660  glob.dep_index.resize(0);
3661  y.Dependent();
3662  glob.ad_stop();
3663 }
3664 
3665 old_state::old_state(global &glob) : glob(glob) {
3666  dep_index = glob.dep_index;
3667  opstack_size = glob.opstack.size();
3668 }
3669 
3670 void old_state::restore() {
3671  glob.dep_index = dep_index;
3672  while (glob.opstack.size() > opstack_size) {
3673  Index input_size = glob.opstack.back()->input_size();
3674  Index output_size = glob.opstack.back()->output_size();
3675  glob.inputs.resize(glob.inputs.size() - input_size);
3676  glob.values.resize(glob.values.size() - output_size);
3677  glob.opstack.back()->deallocate();
3678  glob.opstack.pop_back();
3679  }
3680 }
3681 
3682 term_info::term_info(global &glob, bool do_init) : glob(glob) {
3683  if (do_init) initialize();
3684 }
3685 
3686 void term_info::initialize(std::vector<Index> inv_remap) {
3687  if (inv_remap.size() == 0) inv_remap.resize(glob.inv_index.size(), 0);
3688  inv_remap = radix::factor<Index>(inv_remap);
3689  std::vector<Index> remap = remap_identical_sub_expressions(glob, inv_remap);
3690  std::vector<Index> term_ids = subset(remap, glob.dep_index);
3691  id = radix::factor<Index>(term_ids);
3692  Index max_id = *std::max_element(id.begin(), id.end());
3693  count.resize(max_id + 1, 0);
3694  for (size_t i = 0; i < id.size(); i++) {
3695  count[id[i]]++;
3696  }
3697 }
3698 
3699 gk_config::gk_config()
3700  : debug(false), adaptive(false), nan2zero(true), ytol(1e-2), dx(1) {}
3701 
3703  size_t count = 1;
3704  for (size_t i = 0; i < bound.size(); i++)
3705  if (mask_[i]) count *= bound[i];
3706  return count;
3707 }
3708 
3709 multivariate_index::multivariate_index(size_t bound_, size_t dim, bool flag)
3710  : pointer(0) {
3711  bound.resize(dim, bound_);
3712  x.resize(dim, 0);
3713  mask_.resize(dim, flag);
3714 }
3715 
3716 multivariate_index::multivariate_index(std::vector<size_t> bound, bool flag)
3717  : pointer(0), bound(bound) {
3718  x.resize(bound.size(), 0);
3719  mask_.resize(bound.size(), flag);
3720 }
3721 
3722 void multivariate_index::flip() { mask_.flip(); }
3723 
3725  size_t N = 1;
3726  for (size_t i = 0; i < x.size(); i++) {
3727  if (mask_[i]) {
3728  if (x[i] < bound[i] - 1) {
3729  x[i]++;
3730  pointer += N;
3731  break;
3732  } else {
3733  x[i] = 0;
3734  pointer -= (bound[i] - 1) * N;
3735  }
3736  }
3737  N *= bound[i];
3738  }
3739  return *this;
3740 }
3741 
3742 multivariate_index::operator size_t() { return pointer; }
3743 
3744 size_t multivariate_index::index(size_t i) { return x[i]; }
3745 
3746 std::vector<size_t> multivariate_index::index() { return x; }
3747 
3748 std::vector<bool>::reference multivariate_index::mask(size_t i) {
3749  return mask_[i];
3750 }
3751 
3752 void multivariate_index::set_mask(const std::vector<bool> &mask) {
3753  TMBAD_ASSERT(mask.size() == mask_.size());
3754  mask_ = mask;
3755 }
3756 
3757 size_t clique::clique_size() { return indices.size(); }
3758 
3759 clique::clique() {}
3760 
3761 void clique::subset_inplace(const std::vector<bool> &mask) {
3762  indices = subset(indices, mask);
3763  dim = subset(dim, mask);
3764 }
3765 
3766 void clique::logsum_init() { logsum.resize(prod_int(dim)); }
3767 
3768 bool clique::empty() const { return (indices.size() == 0); }
3769 
3770 bool clique::contains(Index i) {
3771  bool ans = false;
3772  for (size_t j = 0; j < indices.size(); j++) ans |= (i == indices[j]);
3773  return ans;
3774 }
3775 
3776 void clique::get_stride(const clique &super, Index ind,
3777  std::vector<ad_plain> &offset, Index &stride) {
3778  stride = 1;
3779  for (size_t k = 0; (k < clique_size()) && (indices[k] < ind); k++) {
3780  stride *= dim[k];
3781  }
3782 
3783  multivariate_index mv(super.dim);
3784  size_t nx = mv.count();
3785  std::vector<bool> mask = lmatch(super.indices, this->indices);
3786  mask.flip();
3787  mv.set_mask(mask);
3788  std::vector<ad_plain> x(nx);
3789  size_t xa_count = mv.count();
3790  mv.flip();
3791  size_t xi_count = mv.count();
3792  mv.flip();
3793  TMBAD_ASSERT(x.size() == xa_count * xi_count);
3794  for (size_t i = 0; i < xa_count; i++, ++mv) {
3795  mv.flip();
3796  for (size_t j = 0; j < xi_count; j++, ++mv) {
3797  TMBAD_ASSERT(logsum[j].on_some_tape());
3798  x[mv] = logsum[j];
3799  }
3800  mv.flip();
3801  }
3802 
3803  mv = multivariate_index(super.dim);
3804  mask = lmatch(super.indices, std::vector<Index>(1, ind));
3805  mask.flip();
3806  mv.set_mask(mask);
3807 
3808  xa_count = mv.count();
3809  offset.resize(xa_count);
3810  for (size_t i = 0; i < xa_count; i++, ++mv) {
3811  offset[i] = x[mv];
3812  }
3813 }
3814 
3815 sr_grid::sr_grid() {}
3816 
3817 sr_grid::sr_grid(Scalar a, Scalar b, size_t n) : x(n), w(n) {
3818  Scalar h = (b - a) / n;
3819  for (size_t i = 0; i < n; i++) {
3820  x[i] = a + h / 2 + i * h;
3821  w[i] = h;
3822  }
3823 }
3824 
3825 sr_grid::sr_grid(size_t n) {
3826  for (size_t i = 0; i < n; i++) {
3827  x[i] = i;
3828  w[i] = 1. / (double)n;
3829  }
3830 }
3831 
3832 size_t sr_grid::size() { return x.size(); }
3833 
3834 ad_plain sr_grid::logw_offset() {
3835  if (logw.size() != w.size()) {
3836  logw.resize(w.size());
3837  for (size_t i = 0; i < w.size(); i++) logw[i] = log(w[i]);
3838  forceContiguous(logw);
3839  }
3840  return logw[0];
3841 }
3842 
3844  std::vector<Index> random,
3845  std::vector<sr_grid> grid,
3846  std::vector<Index> random2grid,
3847  bool perm)
3848  : grid(grid),
3849  glob(glob),
3850  random(random),
3851  replay(glob, new_glob),
3852  tinfo(glob, false) {
3853  inv2grid.resize(glob.inv_index.size(), 0);
3854  for (size_t i = 0; i < random2grid.size(); i++) {
3855  inv2grid[random[i]] = random2grid[i];
3856  }
3857 
3858  mark.resize(glob.values.size(), false);
3859  for (size_t i = 0; i < random.size(); i++)
3860  mark[glob.inv_index[random[i]]] = true;
3861  glob.forward(mark);
3862 
3863  forward_graph = glob.forward_graph(mark);
3864  reverse_graph = glob.reverse_graph(mark);
3865 
3866  glob.subgraph_cache_ptr();
3867 
3868  var_remap.resize(glob.values.size());
3869 
3870  op2inv_idx = glob.op2idx(glob.inv_index, NA);
3871  op2dep_idx = glob.op2idx(glob.dep_index, NA);
3872 
3873  if (perm) reorder_random();
3874 
3875  terms_done.resize(glob.dep_index.size(), false);
3876 
3877  std::vector<Index> inv_remap(glob.inv_index.size());
3878  for (size_t i = 0; i < inv_remap.size(); i++) inv_remap[i] = -(i + 1);
3879  for (size_t i = 0; i < random.size(); i++)
3880  inv_remap[random[i]] = inv2grid[random[i]];
3881  inv_remap = radix::factor<Index>(inv_remap);
3882  tinfo.initialize(inv_remap);
3883 }
3884 
3886  std::vector<IndexPair> edges;
3887  std::vector<Index> &inv2op = forward_graph.inv2op;
3888 
3889  for (size_t i = 0; i < random.size(); i++) {
3890  std::vector<Index> subgraph(1, inv2op[random[i]]);
3891  forward_graph.search(subgraph);
3892  reverse_graph.search(subgraph);
3893  for (size_t l = 0; l < subgraph.size(); l++) {
3894  Index inv_other = op2inv_idx[subgraph[l]];
3895  if (inv_other != NA) {
3896  IndexPair edge(random[i], inv_other);
3897  edges.push_back(edge);
3898  }
3899  }
3900  }
3901 
3902  size_t num_nodes = glob.inv_index.size();
3903  graph G(num_nodes, edges);
3904 
3905  std::vector<bool> visited(num_nodes, false);
3906  std::vector<Index> subgraph;
3907  for (size_t i = 0; i < random.size(); i++) {
3908  if (visited[random[i]]) continue;
3909  std::vector<Index> sg(1, random[i]);
3910  G.search(sg, visited, false, false);
3911  subgraph.insert(subgraph.end(), sg.begin(), sg.end());
3912  }
3913  std::reverse(subgraph.begin(), subgraph.end());
3914  TMBAD_ASSERT(random.size() == subgraph.size());
3915  random = subgraph;
3916 }
3917 
3918 std::vector<size_t> sequential_reduction::get_grid_bounds(
3919  std::vector<Index> inv_index) {
3920  std::vector<size_t> ans(inv_index.size());
3921  for (size_t i = 0; i < inv_index.size(); i++) {
3922  ans[i] = grid[inv2grid[inv_index[i]]].size();
3923  }
3924  return ans;
3925 }
3926 
3927 std::vector<sr_grid *> sequential_reduction::get_grid(
3928  std::vector<Index> inv_index) {
3929  std::vector<sr_grid *> ans(inv_index.size());
3930  for (size_t i = 0; i < inv_index.size(); i++) {
3931  ans[i] = &(grid[inv2grid[inv_index[i]]]);
3932  }
3933  return ans;
3934 }
3935 
3936 std::vector<ad_aug> sequential_reduction::tabulate(std::vector<Index> inv_index,
3937  Index dep_index) {
3938  size_t id = tinfo.id[dep_index];
3939  size_t count = tinfo.count[id];
3940  bool do_cache = (count >= 2);
3941  if (do_cache) {
3942  if (cache[id].size() > 0) {
3943  return cache[id];
3944  }
3945  }
3946 
3947  std::vector<sr_grid *> inv_grid = get_grid(inv_index);
3948  std::vector<size_t> grid_bounds = get_grid_bounds(inv_index);
3949  multivariate_index mv(grid_bounds);
3950  std::vector<ad_aug> ans(mv.count());
3951  for (size_t i = 0; i < ans.size(); i++, ++mv) {
3952  for (size_t j = 0; j < inv_index.size(); j++) {
3953  replay.value_inv(inv_index[j]) = inv_grid[j]->x[mv.index(j)];
3954  }
3955  replay.forward_sub();
3956  ans[i] = replay.value_dep(dep_index);
3957  }
3958 
3959  forceContiguous(ans);
3960  if (do_cache) {
3961  cache[id] = ans;
3962  }
3963  return ans;
3964 }
3965 
3967  std::vector<Index> super;
3968  size_t c = 0;
3969  for (std::list<clique>::iterator it = cliques.begin(); it != cliques.end();
3970  ++it) {
3971  if ((*it).contains(i)) {
3972  super.insert(super.end(), (*it).indices.begin(), (*it).indices.end());
3973  c++;
3974  }
3975  }
3976  sort_unique_inplace(super);
3977 
3978  std::vector<std::vector<ad_plain> > offset_by_clique(c);
3979  std::vector<Index> stride_by_clique(c);
3980  clique C;
3981  C.indices = super;
3982  C.dim = get_grid_bounds(super);
3983  std::list<clique>::iterator it = cliques.begin();
3984  c = 0;
3985  while (it != cliques.end()) {
3986  if ((*it).contains(i)) {
3987  (*it).get_stride(C, i, offset_by_clique[c], stride_by_clique[c]);
3988  it = cliques.erase(it);
3989  c++;
3990  } else {
3991  ++it;
3992  }
3993  }
3994 
3995  std::vector<bool> mask = lmatch(super, std::vector<Index>(1, i));
3996  mask.flip();
3997  C.subset_inplace(mask);
3998  C.logsum_init();
3999 
4000  grid[inv2grid[i]].logw_offset();
4001  size_t v_begin = get_glob()->values.size();
4002  for (size_t j = 0; j < C.logsum.size(); j++) {
4003  std::vector<ad_plain> x;
4004  std::vector<Index> stride;
4005  for (size_t k = 0; k < offset_by_clique.size(); k++) {
4006  x.push_back(offset_by_clique[k][j]);
4007  stride.push_back(stride_by_clique[k]);
4008  }
4009 
4010  x.push_back(grid[inv2grid[i]].logw_offset());
4011  stride.push_back(1);
4012  C.logsum[j] = logspace_sum_stride(x, stride, grid[inv2grid[i]].size());
4013  }
4014  size_t v_end = get_glob()->values.size();
4015  TMBAD_ASSERT(v_end - v_begin == C.logsum.size());
4016 
4017  cliques.push_back(C);
4018 }
4019 
4021  const std::vector<Index> &inv2op = forward_graph.inv2op;
4022 
4023  Index start_node = inv2op[i];
4024  std::vector<Index> subgraph(1, start_node);
4025  forward_graph.search(subgraph);
4026 
4027  std::vector<Index> dep_clique;
4028  std::vector<Index> subgraph_terms;
4029  for (size_t k = 0; k < subgraph.size(); k++) {
4030  Index node = subgraph[k];
4031  Index dep_idx = op2dep_idx[node];
4032  if (dep_idx != NA && !terms_done[dep_idx]) {
4033  terms_done[dep_idx] = true;
4034  subgraph_terms.push_back(node);
4035  dep_clique.push_back(dep_idx);
4036  }
4037  }
4038  for (size_t k = 0; k < subgraph_terms.size(); k++) {
4039  subgraph.resize(0);
4040  subgraph.push_back(subgraph_terms[k]);
4041 
4042  reverse_graph.search(subgraph);
4043 
4044  std::vector<Index> inv_clique;
4045  for (size_t l = 0; l < subgraph.size(); l++) {
4046  Index tmp = op2inv_idx[subgraph[l]];
4047  if (tmp != NA) inv_clique.push_back(tmp);
4048  }
4049 
4050  glob.subgraph_seq = subgraph;
4051 
4052  clique C;
4053  C.indices = inv_clique;
4054  C.dim = get_grid_bounds(inv_clique);
4055  C.logsum = tabulate(inv_clique, dep_clique[k]);
4056 
4057  cliques.push_back(C);
4058  }
4059 
4060  merge(i);
4061 }
4062 
4063 void sequential_reduction::show_cliques() {
4064  Rcout << "Cliques: ";
4065  std::list<clique>::iterator it;
4066  for (it = cliques.begin(); it != cliques.end(); ++it) {
4067  Rcout << it->indices << " ";
4068  }
4069  Rcout << "\n";
4070 }
4071 
4072 void sequential_reduction::update_all() {
4073  for (size_t i = 0; i < random.size(); i++) update(random[i]);
4074 }
4075 
4076 ad_aug sequential_reduction::get_result() {
4077  ad_aug ans = 0;
4078  std::list<clique>::iterator it;
4079  for (it = cliques.begin(); it != cliques.end(); ++it) {
4080  TMBAD_ASSERT(it->clique_size() == 0);
4081  TMBAD_ASSERT(it->logsum.size() == 1);
4082  ans += it->logsum[0];
4083  }
4084 
4085  for (size_t i = 0; i < terms_done.size(); i++) {
4086  if (!terms_done[i]) ans += replay.value_dep(i);
4087  }
4088  return ans;
4089 }
4090 
4091 global sequential_reduction::marginal() {
4092  replay.start();
4093  replay.forward(true, false);
4094  update_all();
4095  ad_aug ans = get_result();
4096  ans.Dependent();
4097  replay.stop();
4098  return new_glob;
4099 }
4100 
4101 autopar::autopar(global &glob, size_t num_threads)
4102  : glob(glob),
4103  num_threads(num_threads),
4104  do_aggregate(false),
4105  keep_all_inv(false) {
4106  reverse_graph = glob.reverse_graph();
4107 }
4108 
4109 std::vector<size_t> autopar::max_tree_depth() {
4110  std::vector<Index> max_tree_depth(glob.opstack.size(), 0);
4111  Dependencies dep;
4112  Args<> args(glob.inputs);
4113  for (size_t i = 0; i < glob.opstack.size(); i++) {
4114  dep.resize(0);
4115  glob.opstack[i]->dependencies(args, dep);
4116  for (size_t j = 0; j < dep.size(); j++) {
4117  max_tree_depth[i] = std::max(max_tree_depth[i], max_tree_depth[dep[j]]);
4118  }
4119 
4120  max_tree_depth[i]++;
4121 
4122  glob.opstack[i]->increment(args.ptr);
4123  }
4124  std::vector<size_t> ans(glob.dep_index.size());
4125  for (size_t j = 0; j < glob.dep_index.size(); j++) {
4126  ans[j] = max_tree_depth[glob.dep_index[j]];
4127  }
4128  return ans;
4129 }
4130 
4131 void autopar::run() {
4132  std::vector<size_t> ord = order(max_tree_depth());
4133  std::reverse(ord.begin(), ord.end());
4134  std::vector<bool> visited(glob.opstack.size(), false);
4135  std::vector<Index> start;
4136  std::vector<Index> dWork(ord.size());
4137  for (size_t i = 0; i < ord.size(); i++) {
4138  start.resize(1);
4139  start[0] = reverse_graph.dep2op[ord[i]];
4140  reverse_graph.search(start, visited, false, false);
4141  dWork[i] = start.size();
4142  if (false) {
4143  for (size_t k = 0; k < start.size(); k++) {
4144  Rcout << glob.opstack[start[k]]->op_name() << " ";
4145  }
4146  Rcout << "\n";
4147  }
4148  }
4149 
4150  std::vector<size_t> thread_assign(ord.size(), 0);
4151  std::vector<size_t> work_by_thread(num_threads, 0);
4152  for (size_t i = 0; i < dWork.size(); i++) {
4153  if (i == 0) {
4154  thread_assign[i] = 0;
4155  } else {
4156  if (dWork[i] <= 1)
4157  thread_assign[i] = thread_assign[i - 1];
4158  else
4159  thread_assign[i] = which_min(work_by_thread);
4160  }
4161  work_by_thread[thread_assign[i]] += dWork[i];
4162  }
4163 
4164  node_split.resize(num_threads);
4165  for (size_t i = 0; i < ord.size(); i++) {
4166  node_split[thread_assign[i]].push_back(reverse_graph.dep2op[ord[i]]);
4167  }
4168 
4169  for (size_t i = 0; i < num_threads; i++) {
4170  if (keep_all_inv)
4171  node_split[i].insert(node_split[i].begin(), reverse_graph.inv2op.begin(),
4172  reverse_graph.inv2op.end());
4173  reverse_graph.search(node_split[i]);
4174  }
4175 }
4176 
4178  vglob.resize(num_threads);
4179  inv_idx.resize(num_threads);
4180  dep_idx.resize(num_threads);
4181  std::vector<Index> tmp;
4182  for (size_t i = 0; i < num_threads; i++) {
4183  glob.subgraph_seq = node_split[i];
4184  vglob[i] = glob.extract_sub(tmp);
4185  if (do_aggregate) aggregate(vglob[i]);
4186  }
4187 
4188  Index NA = -1;
4189  std::vector<Index> op2inv_idx = glob.op2idx(glob.inv_index, NA);
4190  std::vector<Index> op2dep_idx = glob.op2idx(glob.dep_index, NA);
4191  for (size_t i = 0; i < num_threads; i++) {
4192  std::vector<Index> &seq = node_split[i];
4193  for (size_t j = 0; j < seq.size(); j++) {
4194  if (op2inv_idx[seq[j]] != NA) inv_idx[i].push_back(op2inv_idx[seq[j]]);
4195  if (op2dep_idx[seq[j]] != NA) dep_idx[i].push_back(op2dep_idx[seq[j]]);
4196  }
4197  if (do_aggregate) {
4198  dep_idx[i].resize(1);
4199  dep_idx[i][0] = i;
4200  }
4201  }
4202 }
4203 
4204 size_t autopar::input_size() const { return glob.inv_index.size(); }
4205 
4206 size_t autopar::output_size() const {
4207  return (do_aggregate ? num_threads : glob.dep_index.size());
4208 }
4209 
4210 Index ParalOp::input_size() const { return n; }
4211 
4212 Index ParalOp::output_size() const { return m; }
4213 
4214 ParalOp::ParalOp(const autopar &ap)
4215  : vglob(ap.vglob),
4216  inv_idx(ap.inv_idx),
4217  dep_idx(ap.dep_idx),
4218  n(ap.input_size()),
4219  m(ap.output_size()) {}
4220 
4221 void ParalOp::forward(ForwardArgs<Scalar> &args) {
4222  size_t num_threads = vglob.size();
4223 
4224 #ifdef _OPENMP
4225 #pragma omp parallel for
4226 #endif
4227 
4228  for (size_t i = 0; i < num_threads; i++) {
4229  for (size_t j = 0; j < inv_idx[i].size(); j++) {
4230  vglob[i].value_inv(j) = args.x(inv_idx[i][j]);
4231  }
4232  vglob[i].forward();
4233  }
4234 
4235  for (size_t i = 0; i < num_threads; i++) {
4236  for (size_t j = 0; j < dep_idx[i].size(); j++) {
4237  args.y(dep_idx[i][j]) = vglob[i].value_dep(j);
4238  }
4239  }
4240 }
4241 
4242 void ParalOp::reverse(ReverseArgs<Scalar> &args) {
4243  size_t num_threads = vglob.size();
4244 
4245 #ifdef _OPENMP
4246 #pragma omp parallel for
4247 #endif
4248 
4249  for (size_t i = 0; i < num_threads; i++) {
4250  vglob[i].clear_deriv();
4251  for (size_t j = 0; j < dep_idx[i].size(); j++) {
4252  vglob[i].deriv_dep(j) = args.dy(dep_idx[i][j]);
4253  }
4254  vglob[i].reverse();
4255  }
4256 
4257  for (size_t i = 0; i < num_threads; i++) {
4258  for (size_t j = 0; j < inv_idx[i].size(); j++) {
4259  args.dx(inv_idx[i][j]) += vglob[i].deriv_inv(j);
4260  }
4261  }
4262 }
4263 
4264 const char *ParalOp::op_name() { return "ParalOp"; }
4265 
4266 void ParalOp::print(global::print_config cfg) {
4267  size_t num_threads = vglob.size();
4268  for (size_t i = 0; i < num_threads; i++) {
4269  global::print_config cfg2 = cfg;
4270  std::stringstream ss;
4271  ss << i;
4272  std::string str = ss.str();
4273  cfg2.prefix = cfg2.prefix + str;
4274  vglob[i].print(cfg2);
4275  }
4276 }
4277 
4278 std::vector<Index> get_likely_expression_duplicates(
4279  const global &glob, std::vector<Index> inv_remap) {
4280  global::hash_config cfg;
4281  cfg.strong_inv = true;
4282  cfg.strong_const = true;
4283  cfg.strong_output = true;
4284  cfg.reduce = false;
4285  cfg.deterministic = false;
4286  cfg.inv_seed = inv_remap;
4287  std::vector<hash_t> h = glob.hash_sweep(cfg);
4288  return radix::first_occurance<Index>(h);
4289 }
4290 
4291 bool all_allow_remap(const global &glob) {
4292  Args<> args(glob.inputs);
4293  for (size_t i = 0; i < glob.opstack.size(); i++) {
4294  op_info info = glob.opstack[i]->info();
4295  if (!info.test(op_info::allow_remap)) {
4296  return false;
4297  }
4298  glob.opstack[i]->increment(args.ptr);
4299  }
4300  return true;
4301 }
4302 
4304  global &glob, std::vector<Index> inv_remap) {
4305  std::vector<Index> remap = get_likely_expression_duplicates(glob, inv_remap);
4306 
4307  for (size_t i = 0; i < glob.inv_index.size(); i++) {
4308  bool accept = false;
4309  Index var_i = glob.inv_index[i];
4310  if (inv_remap.size() > 0) {
4311  Index j = inv_remap[i];
4312  Index var_j = glob.inv_index[j];
4313  accept = remap[var_i] == remap[var_j];
4314  }
4315  if (!accept) remap[var_i] = var_i;
4316  }
4317 
4318  std::vector<Index> v2o = glob.var2op();
4319  std::vector<Index> dep;
4320  global::OperatorPure *invop = glob.getOperator<global::InvOp>();
4321  Dependencies dep1;
4322  Dependencies dep2;
4323  size_t reject = 0;
4324  size_t total = 0;
4325  Args<> args(glob.inputs);
4326 
4327  for (size_t j = 0, i = 0, nout = 0; j < glob.opstack.size(); j++, i += nout) {
4328  nout = glob.opstack[j]->output_size();
4329  bool any_remap = false;
4330  for (size_t k = i; k < i + nout; k++) {
4331  if (remap[k] != k) {
4332  any_remap = true;
4333  break;
4334  }
4335  }
4336  if (any_remap) {
4337  bool ok = true;
4338  total += nout;
4339 
4340  global::OperatorPure *CurOp = glob.opstack[v2o[i]];
4341  global::OperatorPure *RemOp = glob.opstack[v2o[remap[i]]];
4342  ok &= (CurOp->identifier() == RemOp->identifier());
4343 
4344  ok &= (CurOp->input_size() == RemOp->input_size());
4345  ok &= (CurOp->output_size() == RemOp->output_size());
4346 
4347  op_info CurInfo = CurOp->info();
4348 
4349  if (ok && (nout > 1)) {
4350  for (size_t k = 1; k < nout; k++) {
4351  ok &= (remap[i + k] < i);
4352 
4353  ok &= (v2o[remap[i + k]] == v2o[remap[i]]);
4354 
4355  ok &= (remap[i + k] == remap[i] + k);
4356  }
4357  }
4358 
4359  if (CurOp == invop) {
4360  ok = false;
4361  }
4362  if (ok) {
4363  if (CurInfo.test(op_info::is_constant)) {
4364  if (glob.values[i] != glob.values[remap[i]]) {
4365  ok = false;
4366  }
4367  }
4368  }
4369 
4370  if (ok) {
4371  glob.subgraph_cache_ptr();
4372 
4373  args.ptr = glob.subgraph_ptr[v2o[i]];
4374  dep1.resize(0);
4375  glob.opstack[v2o[i]]->dependencies(args, dep1);
4376 
4377  args.ptr = glob.subgraph_ptr[v2o[remap[i]]];
4378  dep2.resize(0);
4379  glob.opstack[v2o[remap[i]]]->dependencies(args, dep2);
4380 
4381  ok = (dep1.size() == dep2.size());
4382  if (ok) {
4383  bool all_equal = true;
4384  for (size_t j = 0; j < dep1.size(); j++) {
4385  all_equal &= (remap[dep1[j]] == remap[dep2[j]]);
4386  }
4387  ok = all_equal;
4388  }
4389  }
4390 
4391  if (!ok) {
4392  reject += nout;
4393  for (size_t k = i; k < i + nout; k++) remap[k] = k;
4394  }
4395  }
4396  }
4397 
4398  for (size_t i = 0; i < remap.size(); i++) {
4399  TMBAD_ASSERT(remap[i] <= i);
4400  TMBAD_ASSERT(remap[remap[i]] == remap[i]);
4401  }
4402 
4403  if (true) {
4404  Args<> args(glob.inputs);
4405  intervals<Index> visited;
4406  for (size_t i = 0; i < glob.opstack.size(); i++) {
4407  op_info info = glob.opstack[i]->info();
4408  if (!info.test(op_info::allow_remap)) {
4409  Dependencies dep;
4410  glob.opstack[i]->dependencies(args, dep);
4411  for (size_t j = 0; j < dep.I.size(); j++) {
4412  visited.insert(dep.I[j].first, dep.I[j].second);
4413  }
4414  }
4415  glob.opstack[i]->increment(args.ptr);
4416  }
4417 
4418  forbid_remap<std::vector<Index> > fb(remap);
4419  visited.apply(fb);
4420  }
4421  if (reject > 0) {
4422  ((void)(total));
4423  }
4424 
4425  return remap;
4426 }
4427 
4429  std::vector<Index> inv_remap(0);
4430  std::vector<Index> remap = remap_identical_sub_expressions(glob, inv_remap);
4431 
4432  for (size_t i = 0; i < glob.inputs.size(); i++) {
4433  glob.inputs[i] = remap[glob.inputs[i]];
4434  }
4435 }
4436 
4437 std::vector<Position> inv_positions(global &glob) {
4438  IndexPair ptr(0, 0);
4439  std::vector<bool> independent_variable = glob.inv_marks();
4440  std::vector<Position> ans(glob.inv_index.size());
4441  size_t k = 0;
4442  for (size_t i = 0; i < glob.opstack.size(); i++) {
4443  Index nout = glob.opstack[i]->output_size();
4444  for (Index j = 0; j < nout; j++) {
4445  if (independent_variable[ptr.second + j]) {
4446  ans[k].node = i;
4447  ans[k].ptr = ptr;
4448  k++;
4449  }
4450  }
4451  glob.opstack[i]->increment(ptr);
4452  }
4453  return ans;
4454 }
4455 
4456 void reorder_graph(global &glob, std::vector<Index> inv_idx) {
4457  if (!all_allow_remap(glob)) return;
4458  for (size_t i = 1; i < inv_idx.size(); i++) {
4459  TMBAD_ASSERT(inv_idx[i] > inv_idx[i - 1]);
4460  }
4461  std::vector<bool> marks(glob.values.size(), false);
4462  for (size_t i = 0; i < inv_idx.size(); i++)
4463  marks[glob.inv_index[inv_idx[i]]] = true;
4464  glob.forward_dense(marks);
4465  if (false) {
4466  int c = std::count(marks.begin(), marks.end(), true);
4467  Rcout << "marked proportion:" << (double)c / (double)marks.size() << "\n";
4468  }
4469 
4470  marks.flip();
4471  glob.set_subgraph(marks);
4472  marks.flip();
4473  glob.set_subgraph(marks, true);
4474  glob = glob.extract_sub();
4475 }
4476 } // namespace TMBad
4477 // Autogenerated - do not edit by hand !
4478 #include "integrate.hpp"
4479 namespace TMBad {
4480 
4481 double value(double x) { return x; }
4482 
4483 control::control(int subdivisions_, double reltol_, double abstol_)
4484  : subdivisions(subdivisions_), reltol(reltol_), abstol(abstol_) {}
4485 } // namespace TMBad
4486 // Autogenerated - do not edit by hand !
4487 #include "radix.hpp"
4488 namespace TMBad {}
4489 // Autogenerated - do not edit by hand !
4490 #include "tmbad_allow_comparison.hpp"
4491 namespace TMBad {
4492 
4493 bool operator<(const ad_aug &x, const ad_aug &y) {
4494  return x.Value() < y.Value();
4495 }
4496 bool operator<(const Scalar &x, const ad_aug &y) { return x < y.Value(); }
4497 
4498 bool operator<=(const ad_aug &x, const ad_aug &y) {
4499  return x.Value() <= y.Value();
4500 }
4501 bool operator<=(const Scalar &x, const ad_aug &y) { return x <= y.Value(); }
4502 
4503 bool operator>(const ad_aug &x, const ad_aug &y) {
4504  return x.Value() > y.Value();
4505 }
4506 bool operator>(const Scalar &x, const ad_aug &y) { return x > y.Value(); }
4507 
4508 bool operator>=(const ad_aug &x, const ad_aug &y) {
4509  return x.Value() >= y.Value();
4510 }
4511 bool operator>=(const Scalar &x, const ad_aug &y) { return x >= y.Value(); }
4512 
4513 bool operator==(const ad_aug &x, const ad_aug &y) {
4514  return x.Value() == y.Value();
4515 }
4516 bool operator==(const Scalar &x, const ad_aug &y) { return x == y.Value(); }
4517 
4518 bool operator!=(const ad_aug &x, const ad_aug &y) {
4519  return x.Value() != y.Value();
4520 }
4521 bool operator!=(const Scalar &x, const ad_aug &y) { return x != y.Value(); }
4522 } // namespace TMBad
4523 // Autogenerated - do not edit by hand !
4524 #include "vectorize.hpp"
4525 namespace TMBad {
4526 
4527 VSumOp::VSumOp(size_t n) : n(n) {}
4528 
4529 void VSumOp::dependencies(Args<> &args, Dependencies &dep) const {
4530  dep.add_segment(args.input(0), n);
4531 }
4532 
4533 void VSumOp::forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
4534 
4535 void VSumOp::reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
4536 
4537 const char *VSumOp::op_name() { return "VSumOp"; }
4538 
4539 ad_aug sum(ad_segment x) {
4540  global::Complete<VSumOp> F(x.size());
4541  return F(x)[0];
4542 }
4543 
4544 Scalar *SegmentRef::value_ptr() { return (*glob_ptr).values.data() + offset; }
4545 
4546 Scalar *SegmentRef::deriv_ptr() { return (*glob_ptr).derivs.data() + offset; }
4547 
4548 SegmentRef::SegmentRef() {}
4549 
4550 SegmentRef::SegmentRef(const Scalar *x) {
4551  SegmentRef *sx = (SegmentRef *)x;
4552  *this = *sx;
4553 }
4554 
4555 SegmentRef::SegmentRef(global *g, Index o, Index s)
4556  : glob_ptr(g), offset(o), size(s) {}
4557 
4558 SegmentRef::SegmentRef(const ad_segment &x) {
4559  static const size_t K = ScalarPack<SegmentRef>::size;
4560  TMBAD_ASSERT(x.size() == K);
4561  Scalar buf[K];
4562  for (size_t i = 0; i < K; i++) buf[i] = x[i].Value();
4563  SegmentRef *sx = (SegmentRef *)buf;
4564  *this = *sx;
4565 }
4566 
4567 bool SegmentRef::isNull() { return (glob_ptr == NULL); }
4568 
4569 void SegmentRef::resize(ad_segment &pack, Index n) {
4570  Index i = pack.index();
4571  SegmentRef *p = (SegmentRef *)(get_glob()->values.data() + i);
4572  p->size = n;
4573 }
4574 
4575 PackOp::PackOp(const Index n) : n(n) {}
4576 
4578  SegmentRef *y = (SegmentRef *)args.y_ptr(0);
4579  y[0] = SegmentRef(args.glob_ptr, args.input(0), n);
4580 }
4581 
4583  ad_segment x(args.x_ptr(0), n);
4584  args.y_segment(0, K) = pack(x);
4585 }
4586 
4588  SegmentRef tmp(args.dy_ptr(0));
4589  if (tmp.glob_ptr != NULL) {
4590  Scalar *dx = SegmentRef(args.y_ptr(0)).deriv_ptr();
4591  Scalar *dy = SegmentRef(args.dy_ptr(0)).deriv_ptr();
4592  for (Index i = 0; i < n; i++) dx[i] += dy[i];
4593  }
4594 }
4595 
4597  ad_segment dy_packed(args.dy_ptr(0), K);
4598 
4599  if (SegmentRef(dy_packed).isNull()) {
4600  SegmentRef().resize(dy_packed, n);
4601  }
4602  ad_segment dy = unpack(dy_packed);
4603  ad_segment dx(args.dx_ptr(0), n, true);
4604  dx += dy;
4605  Replay *pdx = args.dx_ptr(0);
4606  for (Index i = 0; i < n; i++) pdx[i] = dx[i];
4607 }
4608 
4609 const char *PackOp::op_name() { return "PackOp"; }
4610 
4611 void PackOp::dependencies(Args<> &args, Dependencies &dep) const {
4612  dep.add_segment(args.input(0), n);
4613 }
4614 
4615 UnpkOp::UnpkOp(const Index n) : noutput(n) {}
4616 
4618  Scalar *y = args.y_ptr(0);
4619  SegmentRef srx(args.x_ptr(0));
4620  if (srx.isNull()) {
4621  for (Index i = 0; i < noutput; i++) y[i] = 0;
4622  return;
4623  }
4624  Scalar *x = srx.value_ptr();
4625  for (Index i = 0; i < noutput; i++) y[i] = x[i];
4626 
4627  ((SegmentRef *)args.x_ptr(0))->glob_ptr = NULL;
4628 }
4629 
4631  SegmentRef *dx = (SegmentRef *)args.dx_ptr(0);
4632  dx[0] = SegmentRef(args.glob_ptr, args.output(0), noutput);
4633 }
4634 
4636  ad_segment dy(args.dy_ptr(0), noutput);
4637  ad_segment dy_packed = pack(dy);
4638  Replay *pdx = args.dx_ptr(0);
4639  for (Index i = 0; i < dy_packed.size(); i++) pdx[i] = dy_packed[i];
4640 }
4641 
4642 const char *UnpkOp::op_name() { return "UnpkOp"; }
4643 
4644 void UnpkOp::dependencies(Args<> &args, Dependencies &dep) const {
4645  dep.add_segment(args.input(0), K);
4646 }
4647 
4649  global::Complete<PackOp> F(x.size());
4650  return F(x);
4651 }
4652 
4654  Index n = SegmentRef(x).size;
4656  return op(x);
4657 }
4658 
4659 Scalar *unpack(const std::vector<Scalar> &x, Index j) {
4660  Index K = ScalarPack<SegmentRef>::size;
4661  SegmentRef sr(&(x[j * K]));
4662  return sr.value_ptr();
4663 }
4664 
4665 std::vector<ad_aug> concat(const std::vector<ad_segment> &x) {
4666  std::vector<ad_aug> ans;
4667  for (size_t i = 0; i < x.size(); i++) {
4668  ad_segment xi = x[i];
4669  for (size_t j = 0; j < xi.size(); j++) {
4670  ans.push_back(xi[j]);
4671  }
4672  }
4673  return ans;
4674 }
4675 } // namespace TMBad
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
std::vector< Index > op2var(const std::vector< Index > &seq)
Get variables produces by a node seqence.
Definition: TMBad.cpp:1435
std::vector< T > subset(const std::vector< T > &x, const std::vector< bool > &y)
Vector subset by boolean mask.
graph reverse_graph(std::vector< bool > keep_var=std::vector< bool >(0))
Construct operator graph with reverse connections.
Definition: TMBad.cpp:1584
diff --git a/TMBad_8hpp_source.html b/TMBad_8hpp_source.html index 563fb39c9..9209ea2b1 100644 --- a/TMBad_8hpp_source.html +++ b/TMBad_8hpp_source.html @@ -73,7 +73,7 @@
TMBad.hpp
-
1 #ifndef HAVE_TMBAD_HPP
2 #define HAVE_TMBAD_HPP
3 // Autogenerated - do not edit by hand !
4 #include "checkpoint.hpp"
5 #include "global.hpp"
6 #include "graph_transform.hpp"
7 
8 namespace TMBad {
9 
10 template <class ADFun>
11 struct Sparse;
12 template <class ADFun>
13 struct Decomp2;
14 template <class ADFun>
15 struct Decomp3;
16 
17 namespace {
18 
19 template <class I>
20 std::vector<I> cumsum0(const std::vector<bool> &x) {
21  std::vector<I> y(x.size(), 0);
22  for (size_t i = 1; i < x.size(); i++) {
23  y[i] = y[i - 1] + x[i - 1];
24  }
25  return y;
26 }
27 } // namespace
28 
56 template <class Functor, class InterfaceVector>
57 struct StdWrap {
58  Functor &F;
59  typedef typename InterfaceVector::value_type Scalar;
60  InterfaceVector tovec(const InterfaceVector &x) { return x; }
61  InterfaceVector tovec(const Scalar &x) {
62  InterfaceVector y(1);
63  y[0] = x;
64  return y;
65  }
66  StdWrap(Functor &F) : F(F) {}
67  template <class T>
68  std::vector<T> operator()(const std::vector<T> &x) {
69  InterfaceVector xi(x);
70  InterfaceVector yi = tovec(F(xi));
71  std::vector<T> y(yi);
72  return y;
73  }
74 };
75 
79  bool compress;
80  bool index_remap;
81 };
82 
116 template <class ad = ad_aug>
117 struct ADFun {
118  global glob;
119 
121  template <class Functor, class ScalarVector>
122  ADFun(Functor F, const ScalarVector &x_) : force_update_flag(false) {
123  std::vector<ad> x(x_.size());
124  for (size_t i = 0; i < x.size(); i++) x[i] = Value(x_[i]);
125  global *glob_begin = get_glob();
126  this->glob.ad_start();
127  Independent(x);
128  std::vector<ad> y = F(x);
129  Dependent(y);
130  this->glob.ad_stop();
131  global *glob_end = get_glob();
132  TMBAD_ASSERT(glob_begin == glob_end);
133  }
134 
138  template <class Functor>
139  ADFun(Functor F, Scalar x0_) : force_update_flag(false) {
140  global *glob_begin = get_glob();
141  this->glob.ad_start();
142  ad x0(x0_);
143  x0.Independent();
144  ad y0 = F(x0);
145  y0.Dependent();
146  this->glob.ad_stop();
147  global *glob_end = get_glob();
148  TMBAD_ASSERT(glob_begin == glob_end);
149  }
150 
154  template <class Functor>
155  ADFun(Functor F, Scalar x0_, Scalar x1_) : force_update_flag(false) {
156  global *glob_begin = get_glob();
157  this->glob.ad_start();
158  ad x0(x0_);
159  x0.Independent();
160  ad x1(x1_);
161  x1.Independent();
162  ad y0 = F(x0, x1);
163  y0.Dependent();
164  this->glob.ad_stop();
165  global *glob_end = get_glob();
166  TMBAD_ASSERT(glob_begin == glob_end);
167  }
168 
169  ADFun() : force_update_flag(false) {}
170 
171  void forward() { glob.forward(); }
172  void reverse() { glob.reverse(); }
173  void clear_deriv() { glob.clear_deriv(); }
174  Scalar &deriv_inv(Index i) { return glob.deriv_inv(i); }
175  Scalar &deriv_dep(Index i) { return glob.deriv_dep(i); }
176 
178  void print(print_config cfg = print_config()) { glob.print(cfg); }
179 
181  void eliminate() { glob.eliminate(); }
182 
195  void optimize() {
196  TMBAD_ASSERT2(inv_pos.size() == 0,
197  "Tape has 'cached independent variable positions' which "
198  "would be invalidated by the optimizer");
199 
200  std::vector<bool> outer_mask;
201  if (inner_outer_in_use()) {
202  outer_mask = DomainOuterMask();
203  }
204 
206 
207  glob.eliminate();
208 
209  if (inner_outer_in_use()) {
210  TMBAD_ASSERT(outer_mask.size() == Domain());
211  set_inner_outer(*this, outer_mask);
212  }
213  }
223  std::vector<Position> pos = inv_positions(glob);
224  inv_pos = subset(pos, invperm(order(glob.inv_index)));
225  }
237  void reorder(std::vector<Index> last) {
238  std::vector<bool> outer_mask;
239  if (inner_outer_in_use()) {
240  outer_mask = DomainOuterMask();
241  }
242  reorder_graph(glob, last);
243 
244  if (inner_outer_in_use()) {
245  TMBAD_ASSERT(outer_mask.size() == Domain());
246  set_inner_outer(*this, outer_mask);
247  }
248  set_inv_positions();
249  }
250 
251  size_t Domain() const { return glob.inv_index.size(); }
252  size_t Range() const { return glob.dep_index.size(); }
254  std::vector<bool> activeDomain() {
255  std::vector<bool> mark(glob.values.size(), false);
256  for (size_t i = 0; i < glob.dep_index.size(); i++)
257  mark[glob.dep_index[i]] = true;
258  glob.reverse(mark);
259  return subset(mark, glob.inv_index);
260  }
262  std::vector<bool> activeRange() {
263  std::vector<bool> mark(glob.values.size(), false);
264  for (size_t i = 0; i < glob.inv_index.size(); i++)
265  mark[glob.inv_index[i]] = true;
266  glob.forward(mark);
267  return subset(mark, glob.dep_index);
268  }
270  std::vector<Scalar> DomainVec() {
271  std::vector<Scalar> xd(Domain());
272  for (size_t i = 0; i < xd.size(); i++) xd[i] = glob.value_inv(i);
273  return xd;
274  }
277  return IndirectAccessor<Scalar>(glob.values, glob.dep_index);
278  }
280  std::vector<bool> get_keep_var(std::vector<bool> keep_x,
281  std::vector<bool> keep_y) {
282  std::vector<bool> keep_var(glob.values.size(), true);
283  if (keep_x.size() > 0 || keep_y.size() > 0) {
284  if (keep_x.size() == 0) keep_x.resize(glob.inv_index.size(), true);
285  if (keep_y.size() == 0) keep_y.resize(glob.dep_index.size(), true);
286  TMBAD_ASSERT(keep_x.size() == glob.inv_index.size());
287  TMBAD_ASSERT(keep_y.size() == glob.dep_index.size());
288 
289  std::vector<bool> keep_var_init(keep_var.size(), false);
290  for (size_t i = 0; i < glob.inv_index.size(); i++)
291  if (keep_x[i]) keep_var_init[glob.inv_index[i]] = true;
292  for (size_t i = 0; i < glob.dep_index.size(); i++)
293  if (keep_y[i]) keep_var_init[glob.dep_index[i]] = true;
294 
295  std::vector<bool> keep_var_x = keep_var_init;
296  glob.forward(keep_var_x);
297 
298  std::vector<bool> keep_var_y = keep_var_init;
299  glob.reverse(keep_var_y);
300 
301  for (size_t i = 0; i < keep_var.size(); i++)
302  keep_var[i] = keep_var_x[i] && keep_var_y[i];
303  }
304  return keep_var;
305  }
313  std::vector<Position> inv_pos;
315  Position find_pos(Index inv) {
316  for (size_t i = 0; i < inv_pos.size(); i++) {
317  if (inv_pos[i].ptr.second == inv) return inv_pos[i];
318  }
319  return Position(0, 0, 0);
320  }
325  Position tail_start;
331  if (glob.inv_index.size() == 0) return true;
332 
333  bool is_sorted = (inv_pos.size() == 0 && !inner_outer_in_use());
334  return is_sorted && (glob.inv_index.size() ==
335  1 + glob.inv_index.back() - glob.inv_index.front());
336  }
339  void set_tail(const std::vector<Index> &random) {
340  if (inv_pos.size() > 0) {
341  std::vector<Position> pos = subset(inv_pos, random);
342  tail_start = *std::min_element(pos.begin(), pos.end());
343  } else {
344  tail_start = Position(0, 0, 0);
345  }
346  }
349  void unset_tail() { tail_start = Position(0, 0, 0); }
351  void force_update() { force_update_flag = true; }
352  bool force_update_flag;
354  template <class InplaceVector>
355  Position DomainVecSet(const InplaceVector &x) {
356  TMBAD_ASSERT(x.size() == Domain());
357  if (force_update_flag) {
358  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i];
359  force_update_flag = false;
360  return Position(0, 0, 0);
361  }
362  if (inv_pos.size() > 0) {
363  if (inner_outer_in_use()) {
364  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i];
365  Index min_inv =
366  *std::min_element(glob.inv_index.begin(), glob.inv_index.end());
367  return find_pos(min_inv);
368  }
369  TMBAD_ASSERT(inv_pos.size() == Domain());
370  size_t min_var_changed = -1;
371  size_t i_min = -1;
372  for (size_t i = 0; i < x.size(); i++) {
373  if (glob.value_inv(i) != x[i] && glob.inv_index[i] < min_var_changed) {
374  min_var_changed = glob.inv_index[i];
375  i_min = i;
376  }
377  glob.value_inv(i) = x[i];
378  }
379  if (min_var_changed == (size_t)-1)
380  return glob.end();
381  else
382  return inv_pos[i_min];
383  }
384  if (x.size() > 0) {
385  bool no_change = true;
386  for (size_t i = 0; i < x.size(); i++) {
387  if (glob.value_inv(i) != x[i]) {
388  no_change = false;
389  break;
390  }
391  }
392  if (no_change) return glob.end();
393 
394  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i];
395  }
396  return Position(0, 0, 0);
397  }
399  template <class Vector>
400  Vector forward(const Vector &x) {
401  TMBAD_ASSERT((size_t)x.size() == Domain());
402  for (size_t i = 0; i < (size_t)x.size(); i++) glob.value_inv(i) = x[i];
403  glob.forward();
404  Vector y(Range());
405  for (size_t i = 0; i < (size_t)y.size(); i++) y[i] = glob.value_dep(i);
406  return y;
407  }
409  template <class Vector>
410  Vector reverse(const Vector &w) {
411  TMBAD_ASSERT((size_t)w.size() == Range());
412  glob.clear_deriv();
413  for (size_t i = 0; i < (size_t)w.size(); i++) glob.deriv_dep(i) = w[i];
414  glob.reverse();
415  Vector d(Domain());
416  for (size_t i = 0; i < (size_t)d.size(); i++) d[i] = glob.deriv_inv(i);
417  return d;
418  }
420  std::vector<Scalar> operator()(const std::vector<Scalar> &x) {
421  Position start = DomainVecSet(x);
422  glob.forward(start);
423  return RangeVec();
424  }
425 
426  IndirectAccessor<Scalar> operator()(
427  const segment_ref<ForwardArgs<Scalar>, x_read> &x) {
428  Position start = DomainVecSet(x);
429  glob.forward(start);
430  return RangeVec();
431  }
437  std::vector<ad> operator()(const std::vector<ad> &x_) const {
438  std::vector<ad> x(x_.begin(), x_.end());
439  TMBAD_ASSERT(x.size() == Domain());
440  for (size_t i = 0; i < x.size(); i++) {
441  x[i].addToTape();
442  }
443  global *cur_glob = get_glob();
444  for (size_t i = 0; i < x.size(); i++) {
445  TMBAD_ASSERT(x[i].on_some_tape());
446  TMBAD_ASSERT(x[i].glob() == cur_glob);
447  }
448  global::replay replay(this->glob, *get_glob());
449  replay.start();
450  for (size_t i = 0; i < this->Domain(); i++) {
451  replay.value_inv(i) = x[i];
452  }
453  replay.forward(false, false);
454  std::vector<ad> y(this->Range());
455  for (size_t i = 0; i < this->Range(); i++) {
456  y[i] = replay.value_dep(i);
457  }
458  replay.stop();
459  return y;
460  }
463  ad operator()(ad x0) {
464  TMBAD_ASSERT(Domain() == 1);
465  TMBAD_ASSERT(Range() == 1);
466  std::vector<ad> x(1);
467  x[0] = x0;
468  return (*this)(x)[0];
469  }
472  ad operator()(ad x0, ad x1) {
473  TMBAD_ASSERT(Domain() == 2);
474  TMBAD_ASSERT(Range() == 1);
475  std::vector<ad> x(2);
476  x[0] = x0;
477  x[1] = x1;
478  return (*this)(x)[0];
479  }
484  std::vector<Scalar> Jacobian(const std::vector<Scalar> &x) {
485  Position start = DomainVecSet(x);
486  glob.forward(start);
487  std::vector<Scalar> ans(Domain() * Range());
488  for (size_t j = 0; j < Range(); j++) {
489  glob.clear_deriv(tail_start);
490  glob.deriv_dep(j) = 1;
491  glob.reverse(tail_start);
492  for (size_t k = 0; k < Domain(); k++)
493  ans[j * Domain() + k] = glob.deriv_inv(k);
494  }
495  return ans;
496  }
503  std::vector<Scalar> Jacobian(const std::vector<Scalar> &x,
504  std::vector<bool> keep_x,
505  std::vector<bool> keep_y) {
506  std::vector<Scalar> ans;
507 
508  std::vector<bool> keep_var = get_keep_var(keep_x, keep_y);
509 
510  graph G = this->glob.reverse_graph(keep_var);
511 
512  std::vector<size_t> which_keep_x = which(keep_x);
513  std::vector<size_t> which_keep_y = which(keep_y);
514 
515  Position start = DomainVecSet(x);
516  glob.forward(start);
517 
518  for (size_t w = 0; w < which_keep_y.size(); w++) {
519  size_t k = which_keep_y[w];
520 
521  glob.subgraph_seq.resize(0);
522  glob.subgraph_seq.push_back(G.dep2op[k]);
523  G.search(glob.subgraph_seq);
524 
525  glob.clear_deriv_sub();
526  for (size_t l = 0; l < which_keep_x.size(); l++)
527  glob.deriv_inv(which_keep_x[l]) = Scalar(0);
528  glob.deriv_dep(k) = 1.;
529  glob.reverse_sub();
530 
531  for (size_t l = 0; l < which_keep_x.size(); l++) {
532  ans.push_back(glob.deriv_inv(which_keep_x[l]));
533  }
534  }
535  return ans;
536  }
542  std::vector<Scalar> Jacobian(const std::vector<Scalar> &x,
543  const std::vector<Scalar> &w) {
544  TMBAD_ASSERT(x.size() == Domain());
545  TMBAD_ASSERT(w.size() == Range());
546  Position start = DomainVecSet(x);
547  glob.forward(start);
548  glob.clear_deriv();
549  for (size_t j = 0; j < Range(); j++) glob.deriv_dep(j) = w[j];
550  glob.reverse();
551  return IndirectAccessor<Scalar>(glob.derivs, glob.inv_index);
552  }
553 
554  IndirectAccessor<Scalar> Jacobian(
555  const segment_ref<ReverseArgs<Scalar>, x_read> &x,
556  const segment_ref<ReverseArgs<Scalar>, dy_read> &w) {
557  TMBAD_ASSERT(x.size() == Domain());
558  TMBAD_ASSERT(w.size() == Range());
559  Position start = DomainVecSet(x);
560  glob.forward(start);
561  glob.clear_deriv();
562  for (size_t j = 0; j < Range(); j++) glob.deriv_dep(j) = w[j];
563  glob.reverse();
564  return IndirectAccessor<Scalar>(glob.derivs, glob.inv_index);
565  }
566  std::vector<ad> Jacobian(const std::vector<ad> &x_,
567  const std::vector<ad> &w_) {
568  std::vector<ad> x(x_.begin(), x_.end());
569  std::vector<ad> w(w_.begin(), w_.end());
570  global *cur_glob = get_glob();
571 
572  TMBAD_ASSERT(x.size() == Domain());
573  for (size_t i = 0; i < x.size(); i++) {
574  x[i].addToTape();
575  }
576  for (size_t i = 0; i < x.size(); i++) {
577  TMBAD_ASSERT(x[i].on_some_tape());
578  TMBAD_ASSERT(x[i].glob() == cur_glob);
579  }
580 
581  TMBAD_ASSERT(w.size() == Range());
582  for (size_t i = 0; i < w.size(); i++) {
583  w[i].addToTape();
584  }
585  for (size_t i = 0; i < w.size(); i++) {
586  TMBAD_ASSERT(w[i].on_some_tape());
587  TMBAD_ASSERT(w[i].glob() == cur_glob);
588  }
589 
590  global::replay replay(this->glob, *get_glob());
591  replay.start();
592  for (size_t i = 0; i < this->Domain(); i++) {
593  replay.value_inv(i) = x[i];
594  }
595  replay.forward(false, false);
596  replay.clear_deriv();
597  for (size_t i = 0; i < this->Range(); i++) {
598  replay.deriv_dep(i) = w[i];
599  }
600  replay.reverse(false, false);
601  std::vector<ad> dx(this->Domain());
602  for (size_t i = 0; i < dx.size(); i++) {
603  dx[i] = replay.deriv_inv(i);
604  }
605  replay.stop();
606  return dx;
607  }
608  template <bool range_weight>
609  ADFun JacFun_(std::vector<bool> keep_x, std::vector<bool> keep_y) {
610  ADFun ans;
611  if (keep_x.size() == 0) keep_x.resize(Domain(), true);
612  if (keep_y.size() == 0) keep_y.resize(Range(), true);
613  std::vector<bool> keep = get_keep_var(keep_x, keep_y);
614  graph G;
615  if (!range_weight && Range() > 1) {
616  G = this->glob.reverse_graph(keep);
617  }
618  keep = glob.var2op(keep);
619  global::replay replay(this->glob, ans.glob);
620  replay.start();
621  replay.forward(true, false);
622  if (!range_weight) {
623  if (G.empty()) {
624  for (size_t i = 0; i < this->Range(); i++) {
625  if (!keep_y[i]) continue;
626  replay.clear_deriv();
627  replay.deriv_dep(i) = 1.;
628  replay.reverse(false, false, tail_start, keep);
629  for (size_t j = 0; j < this->Domain(); j++) {
630  if (keep_x[j]) replay.deriv_inv(j).Dependent();
631  }
632  }
633  } else {
634  replay.clear_deriv();
635  for (size_t i = 0; i < this->Range(); i++) {
636  if (!keep_y[i]) continue;
637  glob.subgraph_seq.resize(0);
638  glob.subgraph_seq.push_back(G.dep2op[i]);
639  G.search(glob.subgraph_seq);
640  replay.deriv_dep(i) = 1.;
641  replay.reverse_sub();
642  for (size_t j = 0; j < this->Domain(); j++) {
643  if (keep_x[j]) replay.deriv_inv(j).Dependent();
644  }
645  replay.clear_deriv_sub();
646  }
647  }
648  } else {
649  replay.clear_deriv();
650  replay.reverse(false, true, tail_start, keep);
651  for (size_t j = 0; j < this->Domain(); j++) {
652  if (keep_x[j]) replay.deriv_inv(j).Dependent();
653  }
654  }
655  replay.stop();
656  set_inner_outer(ans);
657  return ans;
658  }
680  ADFun JacFun(std::vector<bool> keep_x = std::vector<bool>(0),
681  std::vector<bool> keep_y = std::vector<bool>(0)) {
682  return JacFun_<false>(keep_x, keep_y);
683  }
702  ADFun WgtJacFun(std::vector<bool> keep_x = std::vector<bool>(0),
703  std::vector<bool> keep_y = std::vector<bool>(0)) {
704  return JacFun_<true>(keep_x, keep_y);
705  }
709  std::vector<Scalar> x = DomainVec();
710  return ADFun(F, x);
711  }
717  std::vector<ADFun> parallel_accumulate(size_t num_threads) {
718  TMBAD_ASSERT(Range() == 1);
719  global glob_split = accumulation_tree_split(glob);
720  autopar ap(glob_split, num_threads);
721  ap.do_aggregate = true;
722  ap.keep_all_inv = true;
723  ap.run();
724  ap.extract();
725  std::vector<ADFun> ans(num_threads);
726  for (size_t i = 0; i < num_threads; i++) ans[i].glob = ap.vglob[i];
727  return ans;
728  }
732  ADFun parallelize(size_t num_threads) {
733  TMBAD_ASSERT(Range() == 1);
734  global glob_split = accumulation_tree_split(glob);
735  autopar ap(glob_split, num_threads);
736  ap.do_aggregate = true;
737  ap.keep_all_inv = false;
738  ap.run();
739  ap.extract();
740  global::Complete<ParalOp> f_parallel(ap);
741  ADFun F(f_parallel, DomainVec());
742  aggregate(F.glob);
743  return F;
744  }
750  void replay() { glob.forward_replay(true, true); }
776  Sparse<ADFun> SpJacFun(std::vector<bool> keep_x = std::vector<bool>(0),
777  std::vector<bool> keep_y = std::vector<bool>(0),
778  SpJacFun_config config = SpJacFun_config()) {
779  ADFun atomic_jac_row;
780  std::vector<Index> rowcounts;
781 
782  Sparse<ADFun> ans;
783 
784  ans.m = Range();
785  ans.n = Domain();
786 
787  if (keep_x.size() == 0) keep_x.resize(Domain(), true);
788  if (keep_y.size() == 0) keep_y.resize(Range(), true);
789  std::vector<bool> keep_var = get_keep_var(keep_x, keep_y);
790 
791  size_t keep_x_count = std::count(keep_x.begin(), keep_x.end(), true);
792  size_t keep_y_count = std::count(keep_y.begin(), keep_y.end(), true);
793 
794  graph G = this->glob.reverse_graph(keep_var);
795 
796  global::replay replay(this->glob, ans.glob);
797  replay.start();
798  replay.forward(true, false);
799 
800  Index NA = -1;
801  std::vector<Index> op2inv_idx = glob.op2idx(glob.inv_index, NA);
802 
803  std::fill(keep_var.begin(), keep_var.end(), true);
804 
805  std::vector<Index> col_idx;
806  for (size_t k = 0; k < glob.dep_index.size(); k++) {
807  size_t i = glob.dep_index[k];
808 
809  glob.subgraph_seq.resize(0);
810  glob.subgraph_seq.push_back(G.dep2op[k]);
811  G.search(glob.subgraph_seq);
812 
813  bool do_compress = false;
814  if (config.compress) {
815  if (rowcounts.size() == 0) rowcounts = G.rowcounts();
816 
817  size_t cost1 = 0;
818  for (size_t i = 0; i < glob.subgraph_seq.size(); i++) {
819  cost1 += rowcounts[glob.subgraph_seq[i]];
820  }
821 
822  size_t cost2 = Domain() + Range() + Domain();
823 
824  if (cost2 < cost1) do_compress = true;
825  }
826 
827  if (true) {
828  glob.clear_array_subgraph(keep_var);
829  keep_var[i] = true;
830  glob.reverse_sub(keep_var);
831  }
832 
833  col_idx.resize(0);
834  for (size_t l = 0; l < glob.subgraph_seq.size(); l++) {
835  Index idx = op2inv_idx[glob.subgraph_seq[l]];
836  if (idx != NA) {
837  Index nrep = glob.opstack[glob.subgraph_seq[l]]->output_size();
838  for (Index r = 0; r < nrep; r++) {
839  if (keep_var[glob.inv_index[idx]]) col_idx.push_back(idx);
840  idx++;
841  }
842  }
843  }
844 
845  ans.i.resize(ans.i.size() + col_idx.size(), k);
846  ans.j.insert(ans.j.end(), col_idx.begin(), col_idx.end());
847  if (!do_compress) {
848  replay.clear_deriv_sub();
849 
850  replay.deriv_dep(k) = 1.;
851 
852  replay.reverse_sub();
853 
854  } else {
855  if (atomic_jac_row.Domain() == 0) {
856  Rcout << "Warning: This is an experimental compression method\n";
857  Rcout << "Disable: 'config(tmbad.sparse_hessian_compress=0)'\n";
858  atomic_jac_row = this->WgtJacFun(keep_x, keep_y);
859  atomic_jac_row.optimize();
860 
861  atomic_jac_row.set_inv_positions();
862 
863  atomic_jac_row = atomic_jac_row.atomic();
864 
865  replay.clear_deriv_sub();
866  Rcout << "done\n";
867 
868  TMBAD_ASSERT(atomic_jac_row.Domain() ==
869  this->Domain() + this->Range());
870  TMBAD_ASSERT(atomic_jac_row.Range() == keep_x_count);
871  }
872  std::vector<Replay> vec(atomic_jac_row.Domain(), Replay(0));
873  for (size_t i = 0; i < this->Domain(); i++) {
874  vec[i] = replay.value_inv(i);
875  }
876  vec[this->Domain() + k] = 1.;
877  std::vector<Replay> r = atomic_jac_row(vec);
878  size_t r_idx = 0;
879  for (size_t i = 0; i < this->Domain(); i++) {
880  if (keep_x[i]) replay.deriv_inv(i) = r[r_idx++];
881  }
882  }
883  for (size_t l = 0; l < col_idx.size(); l++) {
884  replay.deriv_inv(col_idx[l]).Dependent();
885  }
886  }
887  replay.stop();
888  if (config.index_remap) {
889  if (keep_x.size() > 0) {
890  std::vector<Index> remap_j = cumsum0<Index>(keep_x);
891  ans.j = TMBad::subset(remap_j, ans.j);
892  ans.n = keep_x_count;
893  }
894  if (keep_y.size() > 0) {
895  std::vector<Index> remap_i = cumsum0<Index>(keep_y);
896  ans.i = TMBad::subset(remap_i, ans.i);
897  ans.m = keep_y_count;
898  }
899  }
900  set_inner_outer(ans);
901  return ans;
902  }
907  ADFun marginal_gk(const std::vector<Index> &random,
908  gk_config cfg = gk_config()) {
909  ADFun ans;
910  old_state os(this->glob);
911  aggregate(this->glob, -1);
912  global glob_split = accumulation_tree_split(this->glob);
913  os.restore();
914  integrate_subgraph<ADFun> i_s(glob_split, random, cfg);
915  ans.glob = i_s.gk();
916  aggregate(ans.glob, -1);
917  return ans;
918  }
920  ADFun marginal_sr(const std::vector<Index> &random, std::vector<sr_grid> grid,
921  const std::vector<Index> &random2grid, bool perm = true) {
922  ADFun ans;
923  old_state os(this->glob);
924  aggregate(this->glob, -1);
925  global glob_split = accumulation_tree_split(this->glob);
926  os.restore();
927  sequential_reduction SR(glob_split, random, grid, random2grid, perm);
928  ans.glob = SR.marginal();
929  aggregate(ans.glob, -1);
930  return ans;
931  }
933  ADFun marginal_sr(const std::vector<Index> &random,
934  sr_grid grid = sr_grid()) {
935  return marginal_sr(random, std::vector<sr_grid>(1, grid),
936  std::vector<Index>(0));
937  }
942  ADFun compose(ADFun other) {
943  TMBAD_ASSERT2(other.Range() == this->Domain(),
944  "Compostion of incompatible functions");
945  struct composition {
946  const ADFun &f;
947  const ADFun &g;
948  composition(const ADFun &f, const ADFun &g) : f(f), g(g) {}
949  std::vector<ad> operator()(std::vector<ad> x) { return f(g(x)); }
950  };
951  composition fg(*this, other);
952  return ADFun(fg, other.DomainVec());
953  }
958  Decomp2<ADFun> decompose(std::vector<Index> nodes) {
959  Decomp2<ADFun> ans;
960  global &glob1 = ans.first.glob;
961  global &glob2 = ans.second.glob;
962 
963  OperatorPure *invop = glob.getOperator<global::InvOp>();
964  std::vector<bool> keep(nodes.size(), true);
965  for (size_t i = 0; i < nodes.size(); i++)
966  if (glob.opstack[nodes[i]] == invop) keep[i] = false;
967  nodes = subset(nodes, keep);
968 
969  glob1 = this->glob;
970  glob1.dep_index.resize(0);
971  std::vector<Index> dep1 = glob1.op2var(nodes);
972  glob1.ad_start();
973  for (size_t i = 0; i < dep1.size(); i++) {
974  ad_plain tmp;
975  tmp.index = dep1[i];
976  tmp.Dependent();
977  }
978  glob1.ad_stop();
979  glob1.eliminate();
980 
981  glob2 = this->glob;
982  substitute(glob2, nodes);
983  glob2.eliminate();
984 
985  set_inner_outer(ans.first);
986  set_inner_outer(ans.second);
987 
988  return ans;
989  }
994  Decomp2<ADFun> decompose(const char *name) {
995  std::vector<Index> nodes = find_op_by_name(this->glob, name);
996  return decompose(nodes);
997  }
1003  if (find_op_by_name(glob, "RefOp").size() == 0) return;
1004 
1005  std::vector<bool> keep_x(Domain(), true);
1006  std::vector<bool> keep_y(Range(), true);
1007  std::vector<bool> vars = get_keep_var(keep_x, keep_y);
1008 
1009  vars = reverse_boundary(glob, vars);
1010 
1011  std::vector<Index> nodes = which<Index>(glob.var2op(vars));
1012 
1013  Decomp2<ADFun> decomp = decompose(nodes);
1014 
1015  size_t n_inner = decomp.first.Domain();
1016  size_t n_outer = decomp.first.Range();
1017 
1018  decomp.first.glob.inv_index.resize(0);
1019 
1020  std::vector<ad_aug> empty;
1021  std::vector<ad_aug> gx = decomp.first(empty);
1022 
1023  ADFun &f = decomp.second;
1024 
1025  f.replay();
1026 
1027  TMBAD_ASSERT(n_inner + n_outer == f.Domain());
1028  TMBAD_ASSERT(find_op_by_name(f.glob, "RefOp").size() == 0);
1029  TMBAD_ASSERT(find_op_by_name(f.glob, "InvOp").size() == f.Domain());
1030  TMBAD_ASSERT(gx.size() == n_outer);
1031 
1032  for (size_t i = 0; i < n_outer; i++) {
1033  Index j = f.glob.inv_index[n_inner + i];
1034 
1035  if (gx[i].constant()) {
1036  f.glob.opstack[j] = glob.getOperator<global::ConstOp>();
1037  } else {
1038  f.glob.opstack[j] = glob.getOperator<global::RefOp>(
1039  gx[i].data.glob, gx[i].taped_value.index);
1040  }
1041  }
1042  f.glob.inv_index.resize(n_inner);
1043 
1044  *this = f;
1045  }
1055  std::vector<ad_aug> resolve_refs() {
1056  TMBAD_ASSERT2(
1057  inner_inv_index.size() == 0 && outer_inv_index.size() == 0,
1058  "'resolve_refs' can only be run once for a given function object")
1059 
1060  ;
1061  std::vector<Index> seq = find_op_by_name(glob, "RefOp");
1062  std::vector<Replay> values(seq.size());
1063  std::vector<Index> dummy_inputs;
1064  ForwardArgs<Replay> args(dummy_inputs, values);
1065  for (size_t i = 0; i < seq.size(); i++) {
1066  TMBAD_ASSERT(glob.opstack[seq[i]]->input_size() == 0);
1067  TMBAD_ASSERT(glob.opstack[seq[i]]->output_size() == 1);
1068  glob.opstack[seq[i]]->forward_incr(args);
1069  glob.opstack[seq[i]]->deallocate();
1070  glob.opstack[seq[i]] = get_glob()->getOperator<global::InvOp>();
1071  }
1072  inner_inv_index = glob.inv_index;
1073  outer_inv_index = glob.op2var(seq);
1074 
1075  glob.inv_index.insert(glob.inv_index.end(), outer_inv_index.begin(),
1076  outer_inv_index.end());
1077  return values;
1078  }
1079  std::vector<Index> inner_inv_index;
1080  std::vector<Index> outer_inv_index;
1082  size_t DomainInner() const { return inner_inv_index.size(); }
1084  size_t DomainOuter() const { return outer_inv_index.size(); }
1088  void SwapInner() {
1089  std::swap(glob.inv_index, inner_inv_index);
1090  force_update();
1091  }
1095  void SwapOuter() {
1096  std::swap(glob.inv_index, outer_inv_index);
1097  force_update();
1098  }
1101  return (DomainInner() > 0) || (DomainOuter() > 0);
1102  }
1104  std::vector<bool> DomainOuterMask() {
1105  std::vector<bool> mark_outer =
1106  glob.mark_space(glob.values.size(), outer_inv_index);
1107  return subset(mark_outer, glob.inv_index);
1108  }
1116  void set_inner_outer(ADFun &ans, const std::vector<bool> &outer_mask) {
1117  if (inner_outer_in_use()) {
1118  std::vector<bool> mark(outer_mask);
1119  mark.resize(ans.Domain(), false);
1120 
1121  ans.outer_inv_index = subset(ans.glob.inv_index, mark);
1122 
1123  mark.flip();
1124 
1125  ans.inner_inv_index = subset(ans.glob.inv_index, mark);
1126  }
1127  }
1128  void set_inner_outer(ADFun &ans) {
1129  if (inner_outer_in_use()) {
1130  set_inner_outer(ans, DomainOuterMask());
1131  }
1132  }
1133  void DomainReduce(const std::vector<bool> &inv_keep) {
1134  std::vector<bool> outer_mask = DomainOuterMask();
1135  outer_mask = subset(outer_mask, inv_keep);
1136  glob.inv_index = subset(glob.inv_index, inv_keep);
1137  set_inner_outer(*this, outer_mask);
1138  }
1144  void inactivate(std::vector<Index> nodes) {
1145  for (size_t i = 0; i < nodes.size(); i++) {
1146  OperatorPure *op = glob.opstack[nodes[i]];
1147  glob.opstack[nodes[i]] = glob.getOperator<global::NullOp2>(
1148  op->input_size(), op->output_size());
1149  op->deallocate();
1150  }
1151  }
1152 };
1164 template <class Functor, class Test = ParametersChanged>
1165 ADFun<> ADFun_retaping(Functor &F, const std::vector<ad_aug> &x,
1166  Test test = Test()) {
1167  typedef retaping_derivative_table<Functor, ADFun<>, Test> DTab;
1168  global::Complete<AtomOp<DTab> > Op(F, x, test);
1169  return ADFun<>(Op, x);
1170 }
1171 
1173 template <class dummy = void>
1175  ADFun<> Fp;
1176  ADFun_packed(const ADFun<> &Fp) : Fp(Fp) {}
1177  ADFun_packed() {}
1178  ad_segment operator()(const std::vector<ad_segment> &x) {
1179  std::vector<ad_segment> xp(x.size());
1180  for (size_t i = 0; i < xp.size(); i++) xp[i] = pack(x[i]);
1181  std::vector<ad_aug> yp = Fp(concat(xp));
1182  return unpack(yp, 0);
1183  }
1184  bool initialized() { return Fp.Domain() != 0; }
1185 };
1193 template <class Functor, class Test>
1194 ADFun_packed<> ADFun_retaping(Functor &F, const std::vector<ad_segment> &x,
1195  Test test) {
1196  static const bool packed = true;
1198  packed>
1199  DTab;
1200  PackWrap<Functor> Fp(F);
1201  std::vector<ad_segment> xp(x.size());
1202  for (size_t i = 0; i < xp.size(); i++) xp[i] = pack(x[i]);
1203  std::vector<ad_aug> xp_ = concat(xp);
1204  PackWrap<Test> testp(test);
1205  global::Complete<AtomOp<DTab> > Op(Fp, xp_, testp);
1206  ADFun<> TapeFp(Op, xp_);
1207  return ADFun_packed<>(TapeFp);
1208 }
1209 
1210 template <class ADFun>
1211 struct Sparse : ADFun {
1212  std::vector<Index> i;
1213  std::vector<Index> j;
1214  Index m;
1215  Index n;
1216  Sparse() {}
1217  Sparse(const ADFun &f) : ADFun(f) {}
1218  std::vector<Index> a2v(const std::valarray<Index> &x) const {
1219  return std::vector<Index>(&x[0], &x[0] + x.size());
1220  }
1221  std::valarray<Index> v2a(const std::vector<Index> &x) const {
1222  return std::valarray<Index>(x.data(), x.size());
1223  }
1224  std::valarray<Index> row() const { return v2a(i); }
1225  std::valarray<Index> col() const { return v2a(j); }
1226  void subset_inplace(const std::valarray<bool> &x) {
1227  i = a2v(row()[x]);
1228  j = a2v(col()[x]);
1229  this->glob.dep_index = a2v(v2a(this->glob.dep_index)[x]);
1230  }
1231  void transpose_inplace() {
1232  std::swap(i, j);
1233  std::swap(m, n);
1234  }
1235 };
1236 
1243 template <class ADFun>
1244 struct Decomp2 : std::pair<ADFun, ADFun> {
1245  struct composition {
1246  typedef ad_aug ad;
1247  const ADFun &f;
1248  const ADFun &g;
1249  composition(const ADFun &f, const ADFun &g) : f(f), g(g) {}
1250  std::vector<ad> operator()(std::vector<ad> x) {
1251  std::vector<ad> y = g(x);
1252  x.insert(x.end(), y.begin(), y.end());
1253  return f(x);
1254  }
1255  };
1256  operator ADFun() {
1257  ADFun &g = this->first;
1258  ADFun &f = this->second;
1259  composition fg(f, g);
1260  return ADFun(fg, g.DomainVec());
1261  }
1285  Decomp3<ADFun> HesFun(std::vector<bool> keep_rc = std::vector<bool>(0),
1286  bool sparse_1 = true, bool sparse_2 = true,
1287  bool sparse_3 = true) {
1288  ADFun &g = this->first;
1289  ADFun &f = this->second;
1290  Decomp3<ADFun> ans;
1291  TMBAD_ASSERT(f.Range() == 1);
1292 
1293  std::vector<bool> keep_f = std::vector<bool>(f.Range(), true);
1294  std::vector<bool> keep_g = std::vector<bool>(g.Range(), true);
1295 
1296  typedef ad_aug ad;
1297  global &glob = ans.first.glob;
1298  glob.ad_start();
1299  std::vector<Scalar> x_ = f.DomainVec();
1300  size_t k = g.Range();
1301  size_t n = f.Domain() - k;
1302 
1303  std::vector<bool> mask_x(f.Domain(), false);
1304  for (size_t i = 0; i < n; i++) mask_x[i] = true;
1305  std::vector<bool> mask_s(mask_x);
1306  mask_s.flip();
1307 
1308  std::vector<ad> x(x_.begin(), x_.end() - k);
1309  Independent(x);
1310  std::vector<ad> s = g(x);
1311  std::vector<ad> s0(s.size());
1312 
1313  for (size_t i = 0; i < s.size(); i++) s0[i] = s[i].copy0();
1314  std::vector<ad> xs(x);
1315  xs.insert(xs.end(), s.begin(), s.end());
1316  std::vector<ad> xs0(x);
1317  xs0.insert(xs0.end(), s0.begin(), s0.end());
1318  if (false) {
1319  TMBAD_ASSERT(keep_rc.size() == n || keep_rc.size() == 0);
1320  std::vector<bool> keep_xy(keep_rc);
1321  keep_xy.resize(f.Domain(), true);
1322  ADFun f_grad = f.JacFun(keep_xy, keep_f);
1323  }
1324  ADFun f_grad = f.JacFun();
1325  std::vector<ad> z = subset(f_grad(xs), mask_x);
1326  std::vector<ad> z0 = subset(f_grad(xs0), mask_s);
1327  std::vector<ad> xw(x);
1328  xw.insert(xw.end(), z0.begin(), z0.end());
1329  std::vector<ad> z1 = g.WgtJacFun()(xw);
1330  for (size_t i = 0; i < n; i++) z[i] += z1[i];
1331  Dependent(z);
1332  glob.ad_stop();
1333  glob.eliminate();
1334  ans.first.glob = glob;
1335 
1336  if (sparse_1) {
1337  ans.first = ans.first.SpJacFun(keep_rc, keep_rc);
1338  } else {
1339  ans.first = ans.first.JacFun(keep_rc, keep_rc);
1340  }
1341  ans.first.glob.eliminate();
1342  f.set_inner_outer(ans.first);
1343 
1344  if (sparse_2) {
1345  ans.second = g.SpJacFun(keep_rc);
1346  } else {
1347  ans.second = g.JacFun(keep_rc);
1348  }
1349  ans.second.glob.eliminate();
1350 
1351  Sparse<ADFun> B;
1352  if (sparse_3) {
1353  B = f_grad.SpJacFun(mask_s, mask_s);
1354  } else {
1355  B = f_grad.JacFun(mask_s, mask_s);
1356  }
1357  ans.third.glob.ad_start();
1358  std::vector<ad> xx(x_.begin(), x_.end() - k);
1359  Independent(xx);
1360  s = g(xx);
1361  xs = xx;
1362  xs.insert(xs.end(), s.begin(), s.end());
1363  z = B(xs);
1364  Dependent(z);
1365  ans.third.glob.ad_stop();
1366  ans.third.glob.eliminate();
1367  ans.third.i = B.i;
1368  ans.third.j = B.j;
1369  f.set_inner_outer(ans.third);
1370 
1371  return ans;
1372  }
1373 };
1374 
1384 template <class ADFun>
1385 struct Decomp3 : Decomp2<Sparse<ADFun> > {
1386  Sparse<ADFun> third;
1387 };
1388 
1389 } // namespace TMBad
1390 #endif // HAVE_TMBAD_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_TMBAD_HPP
2 #define HAVE_TMBAD_HPP
3 // Autogenerated - do not edit by hand !
4 #include "checkpoint.hpp"
5 #include "global.hpp"
6 #include "graph_transform.hpp"
7 
8 namespace TMBad {
9 
10 template <class ADFun>
11 struct Sparse;
12 template <class ADFun>
13 struct Decomp2;
14 template <class ADFun>
15 struct Decomp3;
16 
17 namespace {
18 
19 template <class I>
20 std::vector<I> cumsum0(const std::vector<bool> &x) {
21  std::vector<I> y(x.size(), 0);
22  for (size_t i = 1; i < x.size(); i++) {
23  y[i] = y[i - 1] + x[i - 1];
24  }
25  return y;
26 }
27 } // namespace
28 
56 template <class Functor, class InterfaceVector>
57 struct StdWrap {
58  Functor &F;
59  typedef typename InterfaceVector::value_type Scalar;
60  InterfaceVector tovec(const InterfaceVector &x) { return x; }
61  InterfaceVector tovec(const Scalar &x) {
62  InterfaceVector y(1);
63  y[0] = x;
64  return y;
65  }
66  StdWrap(Functor &F) : F(F) {}
67  template <class T>
68  std::vector<T> operator()(const std::vector<T> &x) {
69  InterfaceVector xi(x);
70  InterfaceVector yi = tovec(F(xi));
71  std::vector<T> y(yi);
72  return y;
73  }
74 };
75 
79  bool compress;
80  bool index_remap;
81 };
82 
116 template <class ad = ad_aug>
117 struct ADFun {
118  global glob;
119 
121  template <class Functor, class ScalarVector>
122  ADFun(Functor F, const ScalarVector &x_) : force_update_flag(false) {
123  std::vector<ad> x(x_.size());
124  for (size_t i = 0; i < x.size(); i++) x[i] = Value(x_[i]);
125  global *glob_begin = get_glob();
126  this->glob.ad_start();
127  Independent(x);
128  std::vector<ad> y = F(x);
129  Dependent(y);
130  this->glob.ad_stop();
131  global *glob_end = get_glob();
132  TMBAD_ASSERT(glob_begin == glob_end);
133  }
134 
138  template <class Functor>
139  ADFun(Functor F, Scalar x0_) : force_update_flag(false) {
140  global *glob_begin = get_glob();
141  this->glob.ad_start();
142  ad x0(x0_);
143  x0.Independent();
144  ad y0 = F(x0);
145  y0.Dependent();
146  this->glob.ad_stop();
147  global *glob_end = get_glob();
148  TMBAD_ASSERT(glob_begin == glob_end);
149  }
150 
154  template <class Functor>
155  ADFun(Functor F, Scalar x0_, Scalar x1_) : force_update_flag(false) {
156  global *glob_begin = get_glob();
157  this->glob.ad_start();
158  ad x0(x0_);
159  x0.Independent();
160  ad x1(x1_);
161  x1.Independent();
162  ad y0 = F(x0, x1);
163  y0.Dependent();
164  this->glob.ad_stop();
165  global *glob_end = get_glob();
166  TMBAD_ASSERT(glob_begin == glob_end);
167  }
168 
169  ADFun() : force_update_flag(false) {}
170 
171  void forward() { glob.forward(); }
172  void reverse() { glob.reverse(); }
173  void clear_deriv() { glob.clear_deriv(); }
174  Scalar &deriv_inv(Index i) { return glob.deriv_inv(i); }
175  Scalar &deriv_dep(Index i) { return glob.deriv_dep(i); }
176 
178  void print(print_config cfg = print_config()) { glob.print(cfg); }
179 
181  void eliminate() { glob.eliminate(); }
182 
195  void optimize() {
196  TMBAD_ASSERT2(inv_pos.size() == 0,
197  "Tape has 'cached independent variable positions' which "
198  "would be invalidated by the optimizer");
199 
200  std::vector<bool> outer_mask;
201  if (inner_outer_in_use()) {
202  outer_mask = DomainOuterMask();
203  }
204 
206 
207  glob.eliminate();
208 
209  if (inner_outer_in_use()) {
210  TMBAD_ASSERT(outer_mask.size() == Domain());
211  set_inner_outer(*this, outer_mask);
212  }
213  }
223  std::vector<Position> pos = inv_positions(glob);
224  inv_pos = subset(pos, invperm(order(glob.inv_index)));
225  }
237  void reorder(std::vector<Index> last) {
238  std::vector<bool> outer_mask;
239  if (inner_outer_in_use()) {
240  outer_mask = DomainOuterMask();
241  }
242  reorder_graph(glob, last);
243 
244  if (inner_outer_in_use()) {
245  TMBAD_ASSERT(outer_mask.size() == Domain());
246  set_inner_outer(*this, outer_mask);
247  }
248  set_inv_positions();
249  }
250 
251  size_t Domain() const { return glob.inv_index.size(); }
252  size_t Range() const { return glob.dep_index.size(); }
254  std::vector<bool> activeDomain() {
255  std::vector<bool> mark(glob.values.size(), false);
256  for (size_t i = 0; i < glob.dep_index.size(); i++)
257  mark[glob.dep_index[i]] = true;
258  glob.reverse(mark);
259  return subset(mark, glob.inv_index);
260  }
262  std::vector<bool> activeRange() {
263  std::vector<bool> mark(glob.values.size(), false);
264  for (size_t i = 0; i < glob.inv_index.size(); i++)
265  mark[glob.inv_index[i]] = true;
266  glob.forward(mark);
267  return subset(mark, glob.dep_index);
268  }
270  std::vector<Scalar> DomainVec() {
271  std::vector<Scalar> xd(Domain());
272  for (size_t i = 0; i < xd.size(); i++) xd[i] = glob.value_inv(i);
273  return xd;
274  }
277  return IndirectAccessor<Scalar>(glob.values, glob.dep_index);
278  }
280  std::vector<bool> get_keep_var(std::vector<bool> keep_x,
281  std::vector<bool> keep_y) {
282  std::vector<bool> keep_var(glob.values.size(), true);
283  if (keep_x.size() > 0 || keep_y.size() > 0) {
284  if (keep_x.size() == 0) keep_x.resize(glob.inv_index.size(), true);
285  if (keep_y.size() == 0) keep_y.resize(glob.dep_index.size(), true);
286  TMBAD_ASSERT(keep_x.size() == glob.inv_index.size());
287  TMBAD_ASSERT(keep_y.size() == glob.dep_index.size());
288 
289  std::vector<bool> keep_var_init(keep_var.size(), false);
290  for (size_t i = 0; i < glob.inv_index.size(); i++)
291  if (keep_x[i]) keep_var_init[glob.inv_index[i]] = true;
292  for (size_t i = 0; i < glob.dep_index.size(); i++)
293  if (keep_y[i]) keep_var_init[glob.dep_index[i]] = true;
294 
295  std::vector<bool> keep_var_x = keep_var_init;
296  glob.forward(keep_var_x);
297 
298  std::vector<bool> keep_var_y = keep_var_init;
299  glob.reverse(keep_var_y);
300 
301  for (size_t i = 0; i < keep_var.size(); i++)
302  keep_var[i] = keep_var_x[i] && keep_var_y[i];
303  }
304  return keep_var;
305  }
313  std::vector<Position> inv_pos;
315  Position find_pos(Index inv) {
316  for (size_t i = 0; i < inv_pos.size(); i++) {
317  if (inv_pos[i].ptr.second == inv) return inv_pos[i];
318  }
319  return Position(0, 0, 0);
320  }
325  Position tail_start;
331  if (glob.inv_index.size() == 0) return true;
332 
333  bool is_sorted = (inv_pos.size() == 0 && !inner_outer_in_use());
334  return is_sorted && (glob.inv_index.size() ==
335  1 + glob.inv_index.back() - glob.inv_index.front());
336  }
339  void set_tail(const std::vector<Index> &random) {
340  if (inv_pos.size() > 0) {
341  std::vector<Position> pos = subset(inv_pos, random);
342  tail_start = *std::min_element(pos.begin(), pos.end());
343  } else {
344  tail_start = Position(0, 0, 0);
345  }
346  }
349  void unset_tail() { tail_start = Position(0, 0, 0); }
351  void force_update() { force_update_flag = true; }
352  bool force_update_flag;
354  template <class InplaceVector>
355  Position DomainVecSet(const InplaceVector &x) {
356  TMBAD_ASSERT(x.size() == Domain());
357  if (force_update_flag) {
358  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i];
359  force_update_flag = false;
360  return Position(0, 0, 0);
361  }
362  if (inv_pos.size() > 0) {
363  if (inner_outer_in_use()) {
364  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i];
365  Index min_inv =
366  *std::min_element(glob.inv_index.begin(), glob.inv_index.end());
367  return find_pos(min_inv);
368  }
369  TMBAD_ASSERT(inv_pos.size() == Domain());
370  size_t min_var_changed = -1;
371  size_t i_min = -1;
372  for (size_t i = 0; i < x.size(); i++) {
373  if (glob.value_inv(i) != x[i] && glob.inv_index[i] < min_var_changed) {
374  min_var_changed = glob.inv_index[i];
375  i_min = i;
376  }
377  glob.value_inv(i) = x[i];
378  }
379  if (min_var_changed == (size_t)-1)
380  return glob.end();
381  else
382  return inv_pos[i_min];
383  }
384  if (x.size() > 0) {
385  bool no_change = true;
386  for (size_t i = 0; i < x.size(); i++) {
387  if (glob.value_inv(i) != x[i]) {
388  no_change = false;
389  break;
390  }
391  }
392  if (no_change) return glob.end();
393 
394  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i];
395  }
396  return Position(0, 0, 0);
397  }
399  template <class Vector>
400  Vector forward(const Vector &x) {
401  TMBAD_ASSERT((size_t)x.size() == Domain());
402  for (size_t i = 0; i < (size_t)x.size(); i++) glob.value_inv(i) = x[i];
403  glob.forward();
404  Vector y(Range());
405  for (size_t i = 0; i < (size_t)y.size(); i++) y[i] = glob.value_dep(i);
406  return y;
407  }
409  template <class Vector>
410  Vector reverse(const Vector &w) {
411  TMBAD_ASSERT((size_t)w.size() == Range());
412  glob.clear_deriv();
413  for (size_t i = 0; i < (size_t)w.size(); i++) glob.deriv_dep(i) = w[i];
414  glob.reverse();
415  Vector d(Domain());
416  for (size_t i = 0; i < (size_t)d.size(); i++) d[i] = glob.deriv_inv(i);
417  return d;
418  }
420  std::vector<Scalar> operator()(const std::vector<Scalar> &x) {
421  Position start = DomainVecSet(x);
422  glob.forward(start);
423  return RangeVec();
424  }
425 
426  IndirectAccessor<Scalar> operator()(
427  const segment_ref<ForwardArgs<Scalar>, x_read> &x) {
428  Position start = DomainVecSet(x);
429  glob.forward(start);
430  return RangeVec();
431  }
437  std::vector<ad> operator()(const std::vector<ad> &x_) const {
438  std::vector<ad> x(x_.begin(), x_.end());
439  TMBAD_ASSERT(x.size() == Domain());
440  for (size_t i = 0; i < x.size(); i++) {
441  x[i].addToTape();
442  }
443  global *cur_glob = get_glob();
444  for (size_t i = 0; i < x.size(); i++) {
445  TMBAD_ASSERT(x[i].on_some_tape());
446  TMBAD_ASSERT(x[i].glob() == cur_glob);
447  }
448  global::replay replay(this->glob, *get_glob());
449  replay.start();
450  for (size_t i = 0; i < this->Domain(); i++) {
451  replay.value_inv(i) = x[i];
452  }
453  replay.forward(false, false);
454  std::vector<ad> y(this->Range());
455  for (size_t i = 0; i < this->Range(); i++) {
456  y[i] = replay.value_dep(i);
457  }
458  replay.stop();
459  return y;
460  }
463  ad operator()(ad x0) {
464  TMBAD_ASSERT(Domain() == 1);
465  TMBAD_ASSERT(Range() == 1);
466  std::vector<ad> x(1);
467  x[0] = x0;
468  return (*this)(x)[0];
469  }
472  ad operator()(ad x0, ad x1) {
473  TMBAD_ASSERT(Domain() == 2);
474  TMBAD_ASSERT(Range() == 1);
475  std::vector<ad> x(2);
476  x[0] = x0;
477  x[1] = x1;
478  return (*this)(x)[0];
479  }
484  std::vector<Scalar> Jacobian(const std::vector<Scalar> &x) {
485  Position start = DomainVecSet(x);
486  glob.forward(start);
487  std::vector<Scalar> ans(Domain() * Range());
488  for (size_t j = 0; j < Range(); j++) {
489  glob.clear_deriv(tail_start);
490  glob.deriv_dep(j) = 1;
491  glob.reverse(tail_start);
492  for (size_t k = 0; k < Domain(); k++)
493  ans[j * Domain() + k] = glob.deriv_inv(k);
494  }
495  return ans;
496  }
503  std::vector<Scalar> Jacobian(const std::vector<Scalar> &x,
504  std::vector<bool> keep_x,
505  std::vector<bool> keep_y) {
506  std::vector<Scalar> ans;
507 
508  std::vector<bool> keep_var = get_keep_var(keep_x, keep_y);
509 
510  graph G = this->glob.reverse_graph(keep_var);
511 
512  std::vector<size_t> which_keep_x = which(keep_x);
513  std::vector<size_t> which_keep_y = which(keep_y);
514 
515  Position start = DomainVecSet(x);
516  glob.forward(start);
517 
518  for (size_t w = 0; w < which_keep_y.size(); w++) {
519  size_t k = which_keep_y[w];
520 
521  glob.subgraph_seq.resize(0);
522  glob.subgraph_seq.push_back(G.dep2op[k]);
523  G.search(glob.subgraph_seq);
524 
525  glob.clear_deriv_sub();
526  for (size_t l = 0; l < which_keep_x.size(); l++)
527  glob.deriv_inv(which_keep_x[l]) = Scalar(0);
528  glob.deriv_dep(k) = 1.;
529  glob.reverse_sub();
530 
531  for (size_t l = 0; l < which_keep_x.size(); l++) {
532  ans.push_back(glob.deriv_inv(which_keep_x[l]));
533  }
534  }
535  return ans;
536  }
542  std::vector<Scalar> Jacobian(const std::vector<Scalar> &x,
543  const std::vector<Scalar> &w) {
544  TMBAD_ASSERT(x.size() == Domain());
545  TMBAD_ASSERT(w.size() == Range());
546  Position start = DomainVecSet(x);
547  glob.forward(start);
548  glob.clear_deriv();
549  for (size_t j = 0; j < Range(); j++) glob.deriv_dep(j) = w[j];
550  glob.reverse();
551  return IndirectAccessor<Scalar>(glob.derivs, glob.inv_index);
552  }
553 
554  IndirectAccessor<Scalar> Jacobian(
555  const segment_ref<ReverseArgs<Scalar>, x_read> &x,
556  const segment_ref<ReverseArgs<Scalar>, dy_read> &w) {
557  TMBAD_ASSERT(x.size() == Domain());
558  TMBAD_ASSERT(w.size() == Range());
559  Position start = DomainVecSet(x);
560  glob.forward(start);
561  glob.clear_deriv();
562  for (size_t j = 0; j < Range(); j++) glob.deriv_dep(j) = w[j];
563  glob.reverse();
564  return IndirectAccessor<Scalar>(glob.derivs, glob.inv_index);
565  }
566  std::vector<ad> Jacobian(const std::vector<ad> &x_,
567  const std::vector<ad> &w_) {
568  std::vector<ad> x(x_.begin(), x_.end());
569  std::vector<ad> w(w_.begin(), w_.end());
570  global *cur_glob = get_glob();
571 
572  TMBAD_ASSERT(x.size() == Domain());
573  for (size_t i = 0; i < x.size(); i++) {
574  x[i].addToTape();
575  }
576  for (size_t i = 0; i < x.size(); i++) {
577  TMBAD_ASSERT(x[i].on_some_tape());
578  TMBAD_ASSERT(x[i].glob() == cur_glob);
579  }
580 
581  TMBAD_ASSERT(w.size() == Range());
582  for (size_t i = 0; i < w.size(); i++) {
583  w[i].addToTape();
584  }
585  for (size_t i = 0; i < w.size(); i++) {
586  TMBAD_ASSERT(w[i].on_some_tape());
587  TMBAD_ASSERT(w[i].glob() == cur_glob);
588  }
589 
590  global::replay replay(this->glob, *get_glob());
591  replay.start();
592  for (size_t i = 0; i < this->Domain(); i++) {
593  replay.value_inv(i) = x[i];
594  }
595  replay.forward(false, false);
596  replay.clear_deriv();
597  for (size_t i = 0; i < this->Range(); i++) {
598  replay.deriv_dep(i) = w[i];
599  }
600  replay.reverse(false, false);
601  std::vector<ad> dx(this->Domain());
602  for (size_t i = 0; i < dx.size(); i++) {
603  dx[i] = replay.deriv_inv(i);
604  }
605  replay.stop();
606  return dx;
607  }
608  template <bool range_weight>
609  ADFun JacFun_(std::vector<bool> keep_x, std::vector<bool> keep_y) {
610  ADFun ans;
611  if (keep_x.size() == 0) keep_x.resize(Domain(), true);
612  if (keep_y.size() == 0) keep_y.resize(Range(), true);
613  std::vector<bool> keep = get_keep_var(keep_x, keep_y);
614  graph G;
615  if (!range_weight && Range() > 1) {
616  G = this->glob.reverse_graph(keep);
617  }
618  keep = glob.var2op(keep);
619  global::replay replay(this->glob, ans.glob);
620  replay.start();
621  replay.forward(true, false);
622  if (!range_weight) {
623  if (G.empty()) {
624  for (size_t i = 0; i < this->Range(); i++) {
625  if (!keep_y[i]) continue;
626  replay.clear_deriv();
627  replay.deriv_dep(i) = 1.;
628  replay.reverse(false, false, tail_start, keep);
629  for (size_t j = 0; j < this->Domain(); j++) {
630  if (keep_x[j]) replay.deriv_inv(j).Dependent();
631  }
632  }
633  } else {
634  replay.clear_deriv();
635  for (size_t i = 0; i < this->Range(); i++) {
636  if (!keep_y[i]) continue;
637  glob.subgraph_seq.resize(0);
638  glob.subgraph_seq.push_back(G.dep2op[i]);
639  G.search(glob.subgraph_seq);
640  replay.deriv_dep(i) = 1.;
641  replay.reverse_sub();
642  for (size_t j = 0; j < this->Domain(); j++) {
643  if (keep_x[j]) replay.deriv_inv(j).Dependent();
644  }
645  replay.clear_deriv_sub();
646  }
647  }
648  } else {
649  replay.clear_deriv();
650  replay.reverse(false, true, tail_start, keep);
651  for (size_t j = 0; j < this->Domain(); j++) {
652  if (keep_x[j]) replay.deriv_inv(j).Dependent();
653  }
654  }
655  replay.stop();
656  set_inner_outer(ans);
657  return ans;
658  }
680  ADFun JacFun(std::vector<bool> keep_x = std::vector<bool>(0),
681  std::vector<bool> keep_y = std::vector<bool>(0)) {
682  return JacFun_<false>(keep_x, keep_y);
683  }
702  ADFun WgtJacFun(std::vector<bool> keep_x = std::vector<bool>(0),
703  std::vector<bool> keep_y = std::vector<bool>(0)) {
704  return JacFun_<true>(keep_x, keep_y);
705  }
709  std::vector<Scalar> x = DomainVec();
710  return ADFun(F, x);
711  }
717  std::vector<ADFun> parallel_accumulate(size_t num_threads) {
718  TMBAD_ASSERT(Range() == 1);
719  global glob_split = accumulation_tree_split(glob);
720  autopar ap(glob_split, num_threads);
721  ap.do_aggregate = true;
722  ap.keep_all_inv = true;
723  ap.run();
724  ap.extract();
725  std::vector<ADFun> ans(num_threads);
726  for (size_t i = 0; i < num_threads; i++) ans[i].glob = ap.vglob[i];
727  return ans;
728  }
732  ADFun parallelize(size_t num_threads) {
733  TMBAD_ASSERT(Range() == 1);
734  global glob_split = accumulation_tree_split(glob);
735  autopar ap(glob_split, num_threads);
736  ap.do_aggregate = true;
737  ap.keep_all_inv = false;
738  ap.run();
739  ap.extract();
740  global::Complete<ParalOp> f_parallel(ap);
741  ADFun F(f_parallel, DomainVec());
742  aggregate(F.glob);
743  return F;
744  }
750  void replay() { glob.forward_replay(true, true); }
776  Sparse<ADFun> SpJacFun(std::vector<bool> keep_x = std::vector<bool>(0),
777  std::vector<bool> keep_y = std::vector<bool>(0),
778  SpJacFun_config config = SpJacFun_config()) {
779  ADFun atomic_jac_row;
780  std::vector<Index> rowcounts;
781 
782  Sparse<ADFun> ans;
783 
784  ans.m = Range();
785  ans.n = Domain();
786 
787  if (keep_x.size() == 0) keep_x.resize(Domain(), true);
788  if (keep_y.size() == 0) keep_y.resize(Range(), true);
789  std::vector<bool> keep_var = get_keep_var(keep_x, keep_y);
790 
791  size_t keep_x_count = std::count(keep_x.begin(), keep_x.end(), true);
792  size_t keep_y_count = std::count(keep_y.begin(), keep_y.end(), true);
793 
794  graph G = this->glob.reverse_graph(keep_var);
795 
796  global::replay replay(this->glob, ans.glob);
797  replay.start();
798  replay.forward(true, false);
799 
800  Index NA = -1;
801  std::vector<Index> op2inv_idx = glob.op2idx(glob.inv_index, NA);
802 
803  std::fill(keep_var.begin(), keep_var.end(), true);
804 
805  std::vector<Index> col_idx;
806  for (size_t k = 0; k < glob.dep_index.size(); k++) {
807  size_t i = glob.dep_index[k];
808 
809  glob.subgraph_seq.resize(0);
810  glob.subgraph_seq.push_back(G.dep2op[k]);
811  G.search(glob.subgraph_seq);
812 
813  bool do_compress = false;
814  if (config.compress) {
815  if (rowcounts.size() == 0) rowcounts = G.rowcounts();
816 
817  size_t cost1 = 0;
818  for (size_t i = 0; i < glob.subgraph_seq.size(); i++) {
819  cost1 += rowcounts[glob.subgraph_seq[i]];
820  }
821 
822  size_t cost2 = Domain() + Range() + Domain();
823 
824  if (cost2 < cost1) do_compress = true;
825  }
826 
827  if (true) {
828  glob.clear_array_subgraph(keep_var);
829  keep_var[i] = true;
830  glob.reverse_sub(keep_var);
831  }
832 
833  col_idx.resize(0);
834  for (size_t l = 0; l < glob.subgraph_seq.size(); l++) {
835  Index idx = op2inv_idx[glob.subgraph_seq[l]];
836  if (idx != NA) {
837  Index nrep = glob.opstack[glob.subgraph_seq[l]]->output_size();
838  for (Index r = 0; r < nrep; r++) {
839  if (keep_var[glob.inv_index[idx]]) col_idx.push_back(idx);
840  idx++;
841  }
842  }
843  }
844 
845  ans.i.resize(ans.i.size() + col_idx.size(), k);
846  ans.j.insert(ans.j.end(), col_idx.begin(), col_idx.end());
847  if (!do_compress) {
848  replay.clear_deriv_sub();
849 
850  replay.deriv_dep(k) = 1.;
851 
852  replay.reverse_sub();
853 
854  } else {
855  if (atomic_jac_row.Domain() == 0) {
856  Rcout << "Warning: This is an experimental compression method\n";
857  Rcout << "Disable: 'config(tmbad.sparse_hessian_compress=0)'\n";
858  atomic_jac_row = this->WgtJacFun(keep_x, keep_y);
859  atomic_jac_row.optimize();
860 
861  atomic_jac_row.set_inv_positions();
862 
863  atomic_jac_row = atomic_jac_row.atomic();
864 
865  replay.clear_deriv_sub();
866  Rcout << "done\n";
867 
868  TMBAD_ASSERT(atomic_jac_row.Domain() ==
869  this->Domain() + this->Range());
870  TMBAD_ASSERT(atomic_jac_row.Range() == keep_x_count);
871  }
872  std::vector<Replay> vec(atomic_jac_row.Domain(), Replay(0));
873  for (size_t i = 0; i < this->Domain(); i++) {
874  vec[i] = replay.value_inv(i);
875  }
876  vec[this->Domain() + k] = 1.;
877  std::vector<Replay> r = atomic_jac_row(vec);
878  size_t r_idx = 0;
879  for (size_t i = 0; i < this->Domain(); i++) {
880  if (keep_x[i]) replay.deriv_inv(i) = r[r_idx++];
881  }
882  }
883  for (size_t l = 0; l < col_idx.size(); l++) {
884  replay.deriv_inv(col_idx[l]).Dependent();
885  }
886  }
887  replay.stop();
888  if (config.index_remap) {
889  if (keep_x.size() > 0) {
890  std::vector<Index> remap_j = cumsum0<Index>(keep_x);
891  ans.j = TMBad::subset(remap_j, ans.j);
892  ans.n = keep_x_count;
893  }
894  if (keep_y.size() > 0) {
895  std::vector<Index> remap_i = cumsum0<Index>(keep_y);
896  ans.i = TMBad::subset(remap_i, ans.i);
897  ans.m = keep_y_count;
898  }
899  }
900  set_inner_outer(ans);
901  return ans;
902  }
907  ADFun marginal_gk(const std::vector<Index> &random,
908  gk_config cfg = gk_config()) {
909  ADFun ans;
910  old_state os(this->glob);
911  aggregate(this->glob, -1);
912  global glob_split = accumulation_tree_split(this->glob);
913  os.restore();
914  integrate_subgraph<ADFun> i_s(glob_split, random, cfg);
915  ans.glob = i_s.gk();
916  aggregate(ans.glob, -1);
917  return ans;
918  }
920  ADFun marginal_sr(const std::vector<Index> &random, std::vector<sr_grid> grid,
921  const std::vector<Index> &random2grid, bool perm = true) {
922  ADFun ans;
923  old_state os(this->glob);
924  aggregate(this->glob, -1);
925  global glob_split = accumulation_tree_split(this->glob);
926  os.restore();
927  sequential_reduction SR(glob_split, random, grid, random2grid, perm);
928  ans.glob = SR.marginal();
929  aggregate(ans.glob, -1);
930  return ans;
931  }
933  ADFun marginal_sr(const std::vector<Index> &random,
934  sr_grid grid = sr_grid()) {
935  return marginal_sr(random, std::vector<sr_grid>(1, grid),
936  std::vector<Index>(0));
937  }
942  ADFun compose(ADFun other) {
943  TMBAD_ASSERT2(other.Range() == this->Domain(),
944  "Compostion of incompatible functions");
945  struct composition {
946  const ADFun &f;
947  const ADFun &g;
948  composition(const ADFun &f, const ADFun &g) : f(f), g(g) {}
949  std::vector<ad> operator()(std::vector<ad> x) { return f(g(x)); }
950  };
951  composition fg(*this, other);
952  return ADFun(fg, other.DomainVec());
953  }
958  Decomp2<ADFun> decompose(std::vector<Index> nodes) {
959  Decomp2<ADFun> ans;
960  global &glob1 = ans.first.glob;
961  global &glob2 = ans.second.glob;
962 
963  OperatorPure *invop = glob.getOperator<global::InvOp>();
964  std::vector<bool> keep(nodes.size(), true);
965  for (size_t i = 0; i < nodes.size(); i++)
966  if (glob.opstack[nodes[i]] == invop) keep[i] = false;
967  nodes = subset(nodes, keep);
968 
969  glob1 = this->glob;
970  glob1.dep_index.resize(0);
971  std::vector<Index> dep1 = glob1.op2var(nodes);
972  glob1.ad_start();
973  for (size_t i = 0; i < dep1.size(); i++) {
974  ad_plain tmp;
975  tmp.index = dep1[i];
976  tmp.Dependent();
977  }
978  glob1.ad_stop();
979  glob1.eliminate();
980 
981  glob2 = this->glob;
982  substitute(glob2, nodes);
983  glob2.eliminate();
984 
985  set_inner_outer(ans.first);
986  set_inner_outer(ans.second);
987 
988  return ans;
989  }
994  Decomp2<ADFun> decompose(const char *name) {
995  std::vector<Index> nodes = find_op_by_name(this->glob, name);
996  return decompose(nodes);
997  }
1003  if (find_op_by_name(glob, "RefOp").size() == 0) return;
1004 
1005  std::vector<bool> keep_x(Domain(), true);
1006  std::vector<bool> keep_y(Range(), true);
1007  std::vector<bool> vars = get_keep_var(keep_x, keep_y);
1008 
1009  vars = reverse_boundary(glob, vars);
1010 
1011  std::vector<Index> nodes = which<Index>(glob.var2op(vars));
1012 
1013  Decomp2<ADFun> decomp = decompose(nodes);
1014 
1015  size_t n_inner = decomp.first.Domain();
1016  size_t n_outer = decomp.first.Range();
1017 
1018  decomp.first.glob.inv_index.resize(0);
1019 
1020  std::vector<ad_aug> empty;
1021  std::vector<ad_aug> gx = decomp.first(empty);
1022 
1023  ADFun &f = decomp.second;
1024 
1025  f.replay();
1026 
1027  TMBAD_ASSERT(n_inner + n_outer == f.Domain());
1028  TMBAD_ASSERT(find_op_by_name(f.glob, "RefOp").size() == 0);
1029  TMBAD_ASSERT(find_op_by_name(f.glob, "InvOp").size() == f.Domain());
1030  TMBAD_ASSERT(gx.size() == n_outer);
1031 
1032  for (size_t i = 0; i < n_outer; i++) {
1033  Index j = f.glob.inv_index[n_inner + i];
1034 
1035  if (gx[i].constant()) {
1036  f.glob.opstack[j] = glob.getOperator<global::ConstOp>();
1037  } else {
1038  f.glob.opstack[j] = glob.getOperator<global::RefOp>(
1039  gx[i].data.glob, gx[i].taped_value.index);
1040  }
1041  }
1042  f.glob.inv_index.resize(n_inner);
1043 
1044  *this = f;
1045  }
1055  std::vector<ad_aug> resolve_refs() {
1056  TMBAD_ASSERT2(
1057  inner_inv_index.size() == 0 && outer_inv_index.size() == 0,
1058  "'resolve_refs' can only be run once for a given function object")
1059 
1060  ;
1061  std::vector<Index> seq = find_op_by_name(glob, "RefOp");
1062  std::vector<Replay> values(seq.size());
1063  std::vector<Index> dummy_inputs;
1064  ForwardArgs<Replay> args(dummy_inputs, values);
1065  for (size_t i = 0; i < seq.size(); i++) {
1066  TMBAD_ASSERT(glob.opstack[seq[i]]->input_size() == 0);
1067  TMBAD_ASSERT(glob.opstack[seq[i]]->output_size() == 1);
1068  glob.opstack[seq[i]]->forward_incr(args);
1069  glob.opstack[seq[i]]->deallocate();
1070  glob.opstack[seq[i]] = get_glob()->getOperator<global::InvOp>();
1071  }
1072  inner_inv_index = glob.inv_index;
1073  outer_inv_index = glob.op2var(seq);
1074 
1075  glob.inv_index.insert(glob.inv_index.end(), outer_inv_index.begin(),
1076  outer_inv_index.end());
1077  return values;
1078  }
1079  std::vector<Index> inner_inv_index;
1080  std::vector<Index> outer_inv_index;
1082  size_t DomainInner() const { return inner_inv_index.size(); }
1084  size_t DomainOuter() const { return outer_inv_index.size(); }
1088  void SwapInner() {
1089  std::swap(glob.inv_index, inner_inv_index);
1090  force_update();
1091  }
1095  void SwapOuter() {
1096  std::swap(glob.inv_index, outer_inv_index);
1097  force_update();
1098  }
1101  return (DomainInner() > 0) || (DomainOuter() > 0);
1102  }
1104  std::vector<bool> DomainOuterMask() {
1105  std::vector<bool> mark_outer =
1106  glob.mark_space(glob.values.size(), outer_inv_index);
1107  return subset(mark_outer, glob.inv_index);
1108  }
1116  void set_inner_outer(ADFun &ans, const std::vector<bool> &outer_mask) {
1117  if (inner_outer_in_use()) {
1118  std::vector<bool> mark(outer_mask);
1119  mark.resize(ans.Domain(), false);
1120 
1121  ans.outer_inv_index = subset(ans.glob.inv_index, mark);
1122 
1123  mark.flip();
1124 
1125  ans.inner_inv_index = subset(ans.glob.inv_index, mark);
1126  }
1127  }
1128  void set_inner_outer(ADFun &ans) {
1129  if (inner_outer_in_use()) {
1130  set_inner_outer(ans, DomainOuterMask());
1131  }
1132  }
1133  void DomainReduce(const std::vector<bool> &inv_keep) {
1134  std::vector<bool> outer_mask = DomainOuterMask();
1135  outer_mask = subset(outer_mask, inv_keep);
1136  glob.inv_index = subset(glob.inv_index, inv_keep);
1137  set_inner_outer(*this, outer_mask);
1138  }
1144  void inactivate(std::vector<Index> nodes) {
1145  for (size_t i = 0; i < nodes.size(); i++) {
1146  OperatorPure *op = glob.opstack[nodes[i]];
1147  glob.opstack[nodes[i]] = glob.getOperator<global::NullOp2>(
1148  op->input_size(), op->output_size());
1149  op->deallocate();
1150  }
1151  }
1152 };
1164 template <class Functor, class Test = ParametersChanged>
1165 ADFun<> ADFun_retaping(Functor &F, const std::vector<ad_aug> &x,
1166  Test test = Test()) {
1167  typedef retaping_derivative_table<Functor, ADFun<>, Test> DTab;
1168  global::Complete<AtomOp<DTab> > Op(F, x, test);
1169  return ADFun<>(Op, x);
1170 }
1171 
1173 template <class dummy = void>
1175  ADFun<> Fp;
1176  ADFun_packed(const ADFun<> &Fp) : Fp(Fp) {}
1177  ADFun_packed() {}
1178  ad_segment operator()(const std::vector<ad_segment> &x) {
1179  std::vector<ad_segment> xp(x.size());
1180  for (size_t i = 0; i < xp.size(); i++) xp[i] = pack(x[i]);
1181  std::vector<ad_aug> yp = Fp(concat(xp));
1182  return unpack(yp, 0);
1183  }
1184  bool initialized() { return Fp.Domain() != 0; }
1185 };
1193 template <class Functor, class Test>
1194 ADFun_packed<> ADFun_retaping(Functor &F, const std::vector<ad_segment> &x,
1195  Test test) {
1196  static const bool packed = true;
1198  packed>
1199  DTab;
1200  PackWrap<Functor> Fp(F);
1201  std::vector<ad_segment> xp(x.size());
1202  for (size_t i = 0; i < xp.size(); i++) xp[i] = pack(x[i]);
1203  std::vector<ad_aug> xp_ = concat(xp);
1204  PackWrap<Test> testp(test);
1205  global::Complete<AtomOp<DTab> > Op(Fp, xp_, testp);
1206  ADFun<> TapeFp(Op, xp_);
1207  return ADFun_packed<>(TapeFp);
1208 }
1209 
1210 template <class ADFun>
1211 struct Sparse : ADFun {
1212  std::vector<Index> i;
1213  std::vector<Index> j;
1214  Index m;
1215  Index n;
1216  Sparse() {}
1217  Sparse(const ADFun &f) : ADFun(f) {}
1218  std::vector<Index> a2v(const std::valarray<Index> &x) const {
1219  return std::vector<Index>(&x[0], &x[0] + x.size());
1220  }
1221  std::valarray<Index> v2a(const std::vector<Index> &x) const {
1222  return std::valarray<Index>(x.data(), x.size());
1223  }
1224  std::valarray<Index> row() const { return v2a(i); }
1225  std::valarray<Index> col() const { return v2a(j); }
1226  void subset_inplace(const std::valarray<bool> &x) {
1227  i = a2v(row()[x]);
1228  j = a2v(col()[x]);
1229  this->glob.dep_index = a2v(v2a(this->glob.dep_index)[x]);
1230  }
1231  void transpose_inplace() {
1232  std::swap(i, j);
1233  std::swap(m, n);
1234  }
1235 };
1236 
1243 template <class ADFun>
1244 struct Decomp2 : std::pair<ADFun, ADFun> {
1245  struct composition {
1246  typedef ad_aug ad;
1247  const ADFun &f;
1248  const ADFun &g;
1249  composition(const ADFun &f, const ADFun &g) : f(f), g(g) {}
1250  std::vector<ad> operator()(std::vector<ad> x) {
1251  std::vector<ad> y = g(x);
1252  x.insert(x.end(), y.begin(), y.end());
1253  return f(x);
1254  }
1255  };
1256  operator ADFun() {
1257  ADFun &g = this->first;
1258  ADFun &f = this->second;
1259  composition fg(f, g);
1260  return ADFun(fg, g.DomainVec());
1261  }
1285  Decomp3<ADFun> HesFun(std::vector<bool> keep_rc = std::vector<bool>(0),
1286  bool sparse_1 = true, bool sparse_2 = true,
1287  bool sparse_3 = true) {
1288  ADFun &g = this->first;
1289  ADFun &f = this->second;
1290  Decomp3<ADFun> ans;
1291  TMBAD_ASSERT(f.Range() == 1);
1292 
1293  std::vector<bool> keep_f = std::vector<bool>(f.Range(), true);
1294  std::vector<bool> keep_g = std::vector<bool>(g.Range(), true);
1295 
1296  typedef ad_aug ad;
1297  global &glob = ans.first.glob;
1298  glob.ad_start();
1299  std::vector<Scalar> x_ = f.DomainVec();
1300  size_t k = g.Range();
1301  size_t n = f.Domain() - k;
1302 
1303  std::vector<bool> mask_x(f.Domain(), false);
1304  for (size_t i = 0; i < n; i++) mask_x[i] = true;
1305  std::vector<bool> mask_s(mask_x);
1306  mask_s.flip();
1307 
1308  std::vector<ad> x(x_.begin(), x_.end() - k);
1309  Independent(x);
1310  std::vector<ad> s = g(x);
1311  std::vector<ad> s0(s.size());
1312 
1313  for (size_t i = 0; i < s.size(); i++) s0[i] = s[i].copy0();
1314  std::vector<ad> xs(x);
1315  xs.insert(xs.end(), s.begin(), s.end());
1316  std::vector<ad> xs0(x);
1317  xs0.insert(xs0.end(), s0.begin(), s0.end());
1318  if (false) {
1319  TMBAD_ASSERT(keep_rc.size() == n || keep_rc.size() == 0);
1320  std::vector<bool> keep_xy(keep_rc);
1321  keep_xy.resize(f.Domain(), true);
1322  ADFun f_grad = f.JacFun(keep_xy, keep_f);
1323  }
1324  ADFun f_grad = f.JacFun();
1325  std::vector<ad> z = subset(f_grad(xs), mask_x);
1326  std::vector<ad> z0 = subset(f_grad(xs0), mask_s);
1327  std::vector<ad> xw(x);
1328  xw.insert(xw.end(), z0.begin(), z0.end());
1329  std::vector<ad> z1 = g.WgtJacFun()(xw);
1330  for (size_t i = 0; i < n; i++) z[i] += z1[i];
1331  Dependent(z);
1332  glob.ad_stop();
1333  glob.eliminate();
1334  ans.first.glob = glob;
1335 
1336  if (sparse_1) {
1337  ans.first = ans.first.SpJacFun(keep_rc, keep_rc);
1338  } else {
1339  ans.first = ans.first.JacFun(keep_rc, keep_rc);
1340  }
1341  ans.first.glob.eliminate();
1342  f.set_inner_outer(ans.first);
1343 
1344  if (sparse_2) {
1345  ans.second = g.SpJacFun(keep_rc);
1346  } else {
1347  ans.second = g.JacFun(keep_rc);
1348  }
1349  ans.second.glob.eliminate();
1350 
1351  Sparse<ADFun> B;
1352  if (sparse_3) {
1353  B = f_grad.SpJacFun(mask_s, mask_s);
1354  } else {
1355  B = f_grad.JacFun(mask_s, mask_s);
1356  }
1357  ans.third.glob.ad_start();
1358  std::vector<ad> xx(x_.begin(), x_.end() - k);
1359  Independent(xx);
1360  s = g(xx);
1361  xs = xx;
1362  xs.insert(xs.end(), s.begin(), s.end());
1363  z = B(xs);
1364  Dependent(z);
1365  ans.third.glob.ad_stop();
1366  ans.third.glob.eliminate();
1367  ans.third.i = B.i;
1368  ans.third.j = B.j;
1369  f.set_inner_outer(ans.third);
1370 
1371  return ans;
1372  }
1373 };
1374 
1384 template <class ADFun>
1385 struct Decomp3 : Decomp2<Sparse<ADFun> > {
1386  Sparse<ADFun> third;
1387 };
1388 
1389 } // namespace TMBad
1390 #endif // HAVE_TMBAD_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
std::vector< bool > get_keep_var(std::vector< bool > keep_x, std::vector< bool > keep_y)
Get necessary variables to keep for given input/output selection.
Definition: TMBad.hpp:280
std::vector< Index > op2var(const std::vector< Index > &seq)
Get variables produces by a node seqence.
Definition: TMBad.cpp:1435
std::vector< T > subset(const std::vector< T > &x, const std::vector< bool > &y)
Vector subset by boolean mask.
diff --git a/ad__blas_8hpp_source.html b/ad__blas_8hpp_source.html index b90ba2ad8..2d493beda 100644 --- a/ad__blas_8hpp_source.html +++ b/ad__blas_8hpp_source.html @@ -73,7 +73,7 @@
ad_blas.hpp
-
1 #ifndef HAVE_AD_BLAS_HPP
2 #define HAVE_AD_BLAS_HPP
3 // Autogenerated - do not edit by hand !
4 #include <Eigen/Dense>
5 #include "global.hpp"
6 
7 namespace TMBad {
8 
22 template <class Matrix>
24  bool yes = true;
25  Index j_previous = -1;
26  for (size_t i = 0; i < (size_t)x.size(); i++) {
27  if (!x(i).on_some_tape()) {
28  yes = false;
29  break;
30  }
31  Index j = ad_plain(x(i)).index;
32  if (i > 0) {
33  if (j != j_previous + 1) {
34  yes = false;
35  break;
36  }
37  }
38  j_previous = j;
39  }
40  if (yes) {
41  return global::ad_segment(ad_plain(x(0)), x.rows(), x.cols());
42  }
43 
44  ad_plain ans;
45  for (size_t i = 0; i < (size_t)x.size(); i++) {
46  ad_plain xi_cpy = x(i).copy();
47 
48  x(i).override_by(xi_cpy);
49  if (i == 0) ans = xi_cpy;
50  }
51  return global::ad_segment(ans, x.rows(), x.cols());
52 }
53 
54 using Eigen::Dynamic;
55 using Eigen::Map;
56 using Eigen::Matrix;
57 typedef Matrix<double, Dynamic, Dynamic> dmatrix;
58 typedef Matrix<global::Replay, Dynamic, Dynamic> vmatrix;
59 
60 template <class Target>
61 void fill(Target &y, const global::ad_segment x) {
62  TMBAD_ASSERT((size_t)y.size() == (size_t)x.size());
63  for (size_t i = 0; i < (size_t)y.size(); i++) {
64  y(i) = x[i];
65  }
66 }
67 
68 template <bool XT, bool YT, bool ZT, bool UP>
69 struct MatMul;
70 template <bool XT, bool YT, bool ZT, bool UP>
71 void matmul(const vmatrix &x, const vmatrix &y, Map<vmatrix> z) {
74  if (!UP) {
75  global::ad_segment out =
76  get_glob()->add_to_stack<MatMul<XT, YT, ZT, UP> >(xc, yc);
77  fill(z, out);
78  } else {
80  get_glob()->add_to_stack<MatMul<XT, YT, ZT, UP> >(xc, yc, zc);
81  }
82 }
83 
85 vmatrix matmul(const vmatrix &x, const vmatrix &y);
86 
88 dmatrix matmul(const dmatrix &x, const dmatrix &y);
89 
91 template <bool XT, bool YT, bool ZT, bool UP>
92 void matmul(Map<const dmatrix> x, Map<const dmatrix> y, Map<dmatrix> z) {
93  if (!UP) {
94  if (XT && YT && ZT) z.transpose() = x.transpose() * y.transpose();
95  if (!XT && YT && ZT) z.transpose() = x * y.transpose();
96  if (XT && !YT && ZT) z.transpose() = x.transpose() * y;
97  if (XT && YT && !ZT) z = x.transpose() * y.transpose();
98  if (!XT && !YT && ZT) z.transpose() = x * y;
99  if (XT && !YT && !ZT) z = x.transpose() * y;
100  if (!XT && YT && !ZT) z = x * y.transpose();
101  if (!XT && !YT && !ZT) z = x * y;
102  }
103  if (UP) {
104  if (XT && YT && ZT) z.transpose() += x.transpose() * y.transpose();
105  if (!XT && YT && ZT) z.transpose() += x * y.transpose();
106  if (XT && !YT && ZT) z.transpose() += x.transpose() * y;
107  if (XT && YT && !ZT) z += x.transpose() * y.transpose();
108  if (!XT && !YT && ZT) z.transpose() += x * y;
109  if (XT && !YT && !ZT) z += x.transpose() * y;
110  if (!XT && YT && !ZT) z += x * y.transpose();
111  if (!XT && !YT && !ZT) z += x * y;
112  }
113 }
114 
115 template <bool XT, bool YT, bool ZT, bool UP>
116 struct MatMul : global::Operator<2 + UP, -1> {
117  static const bool dynamic = true;
118  static const int max_fuse_depth = 0;
119  int n1, n2, n3;
120  static const int ninput = 2 + UP;
122  set_dim(X.rows(), X.cols(), Y.rows(), Y.cols());
123  }
124  MatMul(int n1, int n2, int n3) : n1(n1), n2(n2), n3(n3) {}
125  Index input_size() const { return 2 + UP; }
126  Index output_size() const {
127  if (UP) return 0;
128  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
129  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
130  return Zrows * Zcols;
131  }
132  static const bool have_input_size_output_size = true;
133  void set_dim(int Xrows, int Xcols, int Yrows, int Ycols) {
134  n1 = Xrows;
135  n2 = Xcols;
136  n3 = (YT ? Yrows : Ycols);
137  }
138  void get_dim(int &Xrows, int &Xcols, int &Yrows, int &Ycols, int &Zrows,
139  int &Zcols) const {
140  Xrows = n1;
141  Xcols = n2;
142 
143  int Xop_rows = Xrows, Xop_cols = Xcols;
144  if (XT) std::swap(Xop_rows, Xop_cols);
145 
146  int Yop_rows = Xop_cols, Yop_cols = n3;
147 
148  Yrows = Yop_rows;
149  Ycols = Yop_cols;
150  if (YT) std::swap(Yrows, Ycols);
151 
152  int Zop_rows = Xop_rows, Zop_cols = Yop_cols;
153 
154  Zrows = Zop_rows;
155  Zcols = Zop_cols;
156  if (ZT) std::swap(Zrows, Zcols);
157  }
158  template <class Type>
159  void forward(ForwardArgs<Type> &args) {
160  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
161  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
162  typedef Map<Matrix<Type, Dynamic, Dynamic> > MapMatrix;
163  typedef Map<const Matrix<Type, Dynamic, Dynamic> > ConstMapMatrix;
164  Type *zp = (UP ? args.x_ptr(2) : args.y_ptr(0));
165  ConstMapMatrix X(args.x_ptr(0), Xrows, Xcols);
166  ConstMapMatrix Y(args.x_ptr(1), Yrows, Ycols);
167  MapMatrix Z(zp, Zrows, Zcols);
168  matmul<XT, YT, ZT, UP>(X, Y, Z);
169  }
170  template <class Type>
171  void reverse(ReverseArgs<Type> &args) {
172  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
173  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
174  typedef Map<Matrix<Type, Dynamic, Dynamic> > MapMatrix;
175  typedef Map<const Matrix<Type, Dynamic, Dynamic> > ConstMapMatrix;
176  Type *dzp = (UP ? args.dx_ptr(2) : args.dy_ptr(0));
177  ConstMapMatrix X(args.x_ptr(0), Xrows, Xcols);
178  ConstMapMatrix Y(args.x_ptr(1), Yrows, Ycols);
179  ConstMapMatrix W(dzp, Zrows, Zcols);
180  MapMatrix DX(args.dx_ptr(0), Xrows, Xcols);
181  MapMatrix DY(args.dx_ptr(1), Yrows, Ycols);
182 
183  matmul<ZT, !YT, XT, true>(W, Y, DX);
184  matmul<!XT, ZT, YT, true>(X, W, DY);
185  }
186 
187  void dependencies(Args<> &args, Dependencies &dep) const {
188  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
189  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
190  dep.add_segment(args.input(0), Xrows * Xcols);
191  dep.add_segment(args.input(1), Yrows * Ycols);
192  }
193 
194  void dependencies_updating(Args<> &args, Dependencies &dep) const {
195  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
196  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
197  if (UP) {
198  dep.add_segment(args.input(2), Zrows * Zcols);
199  }
200  }
201  static const bool have_dependencies = true;
203  static const bool implicit_dependencies = true;
205  static const bool allow_remap = false;
207  static const bool updating = true;
208 
209  void forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
210  void reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
211  const char *op_name() { return "MatMul"; }
212 };
213 
214 } // namespace TMBad
215 #endif // HAVE_AD_BLAS_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_AD_BLAS_HPP
2 #define HAVE_AD_BLAS_HPP
3 // Autogenerated - do not edit by hand !
4 #include <Eigen/Dense>
5 #include "global.hpp"
6 
7 namespace TMBad {
8 
22 template <class Matrix>
24  bool yes = true;
25  Index j_previous = -1;
26  for (size_t i = 0; i < (size_t)x.size(); i++) {
27  if (!x(i).on_some_tape()) {
28  yes = false;
29  break;
30  }
31  Index j = ad_plain(x(i)).index;
32  if (i > 0) {
33  if (j != j_previous + 1) {
34  yes = false;
35  break;
36  }
37  }
38  j_previous = j;
39  }
40  if (yes) {
41  return global::ad_segment(ad_plain(x(0)), x.rows(), x.cols());
42  }
43 
44  ad_plain ans;
45  for (size_t i = 0; i < (size_t)x.size(); i++) {
46  ad_plain xi_cpy = x(i).copy();
47 
48  x(i).override_by(xi_cpy);
49  if (i == 0) ans = xi_cpy;
50  }
51  return global::ad_segment(ans, x.rows(), x.cols());
52 }
53 
54 using Eigen::Dynamic;
55 using Eigen::Map;
56 using Eigen::Matrix;
57 typedef Matrix<double, Dynamic, Dynamic> dmatrix;
58 typedef Matrix<global::Replay, Dynamic, Dynamic> vmatrix;
59 
60 template <class Target>
61 void fill(Target &y, const global::ad_segment x) {
62  TMBAD_ASSERT((size_t)y.size() == (size_t)x.size());
63  for (size_t i = 0; i < (size_t)y.size(); i++) {
64  y(i) = x[i];
65  }
66 }
67 
68 template <bool XT, bool YT, bool ZT, bool UP>
69 struct MatMul;
70 template <bool XT, bool YT, bool ZT, bool UP>
71 void matmul(const vmatrix &x, const vmatrix &y, Map<vmatrix> z) {
74  if (!UP) {
75  global::ad_segment out =
76  get_glob()->add_to_stack<MatMul<XT, YT, ZT, UP> >(xc, yc);
77  fill(z, out);
78  } else {
80  get_glob()->add_to_stack<MatMul<XT, YT, ZT, UP> >(xc, yc, zc);
81  }
82 }
83 
85 vmatrix matmul(const vmatrix &x, const vmatrix &y);
86 
88 dmatrix matmul(const dmatrix &x, const dmatrix &y);
89 
91 template <bool XT, bool YT, bool ZT, bool UP>
92 void matmul(Map<const dmatrix> x, Map<const dmatrix> y, Map<dmatrix> z) {
93  if (!UP) {
94  if (XT && YT && ZT) z.transpose() = x.transpose() * y.transpose();
95  if (!XT && YT && ZT) z.transpose() = x * y.transpose();
96  if (XT && !YT && ZT) z.transpose() = x.transpose() * y;
97  if (XT && YT && !ZT) z = x.transpose() * y.transpose();
98  if (!XT && !YT && ZT) z.transpose() = x * y;
99  if (XT && !YT && !ZT) z = x.transpose() * y;
100  if (!XT && YT && !ZT) z = x * y.transpose();
101  if (!XT && !YT && !ZT) z = x * y;
102  }
103  if (UP) {
104  if (XT && YT && ZT) z.transpose() += x.transpose() * y.transpose();
105  if (!XT && YT && ZT) z.transpose() += x * y.transpose();
106  if (XT && !YT && ZT) z.transpose() += x.transpose() * y;
107  if (XT && YT && !ZT) z += x.transpose() * y.transpose();
108  if (!XT && !YT && ZT) z.transpose() += x * y;
109  if (XT && !YT && !ZT) z += x.transpose() * y;
110  if (!XT && YT && !ZT) z += x * y.transpose();
111  if (!XT && !YT && !ZT) z += x * y;
112  }
113 }
114 
115 template <bool XT, bool YT, bool ZT, bool UP>
116 struct MatMul : global::Operator<2 + UP, -1> {
117  static const bool dynamic = true;
118  static const int max_fuse_depth = 0;
119  int n1, n2, n3;
120  static const int ninput = 2 + UP;
122  set_dim(X.rows(), X.cols(), Y.rows(), Y.cols());
123  }
124  MatMul(int n1, int n2, int n3) : n1(n1), n2(n2), n3(n3) {}
125  Index input_size() const { return 2 + UP; }
126  Index output_size() const {
127  if (UP) return 0;
128  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
129  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
130  return Zrows * Zcols;
131  }
132  static const bool have_input_size_output_size = true;
133  void set_dim(int Xrows, int Xcols, int Yrows, int Ycols) {
134  n1 = Xrows;
135  n2 = Xcols;
136  n3 = (YT ? Yrows : Ycols);
137  }
138  void get_dim(int &Xrows, int &Xcols, int &Yrows, int &Ycols, int &Zrows,
139  int &Zcols) const {
140  Xrows = n1;
141  Xcols = n2;
142 
143  int Xop_rows = Xrows, Xop_cols = Xcols;
144  if (XT) std::swap(Xop_rows, Xop_cols);
145 
146  int Yop_rows = Xop_cols, Yop_cols = n3;
147 
148  Yrows = Yop_rows;
149  Ycols = Yop_cols;
150  if (YT) std::swap(Yrows, Ycols);
151 
152  int Zop_rows = Xop_rows, Zop_cols = Yop_cols;
153 
154  Zrows = Zop_rows;
155  Zcols = Zop_cols;
156  if (ZT) std::swap(Zrows, Zcols);
157  }
158  template <class Type>
159  void forward(ForwardArgs<Type> &args) {
160  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
161  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
162  typedef Map<Matrix<Type, Dynamic, Dynamic> > MapMatrix;
163  typedef Map<const Matrix<Type, Dynamic, Dynamic> > ConstMapMatrix;
164  Type *zp = (UP ? args.x_ptr(2) : args.y_ptr(0));
165  ConstMapMatrix X(args.x_ptr(0), Xrows, Xcols);
166  ConstMapMatrix Y(args.x_ptr(1), Yrows, Ycols);
167  MapMatrix Z(zp, Zrows, Zcols);
168  matmul<XT, YT, ZT, UP>(X, Y, Z);
169  }
170  template <class Type>
171  void reverse(ReverseArgs<Type> &args) {
172  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
173  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
174  typedef Map<Matrix<Type, Dynamic, Dynamic> > MapMatrix;
175  typedef Map<const Matrix<Type, Dynamic, Dynamic> > ConstMapMatrix;
176  Type *dzp = (UP ? args.dx_ptr(2) : args.dy_ptr(0));
177  ConstMapMatrix X(args.x_ptr(0), Xrows, Xcols);
178  ConstMapMatrix Y(args.x_ptr(1), Yrows, Ycols);
179  ConstMapMatrix W(dzp, Zrows, Zcols);
180  MapMatrix DX(args.dx_ptr(0), Xrows, Xcols);
181  MapMatrix DY(args.dx_ptr(1), Yrows, Ycols);
182 
183  matmul<ZT, !YT, XT, true>(W, Y, DX);
184  matmul<!XT, ZT, YT, true>(X, W, DY);
185  }
186 
187  void dependencies(Args<> &args, Dependencies &dep) const {
188  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
189  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
190  dep.add_segment(args.input(0), Xrows * Xcols);
191  dep.add_segment(args.input(1), Yrows * Ycols);
192  }
193 
194  void dependencies_updating(Args<> &args, Dependencies &dep) const {
195  int Xrows, Xcols, Yrows, Ycols, Zrows, Zcols;
196  get_dim(Xrows, Xcols, Yrows, Ycols, Zrows, Zcols);
197  if (UP) {
198  dep.add_segment(args.input(2), Zrows * Zcols);
199  }
200  }
201  static const bool have_dependencies = true;
203  static const bool implicit_dependencies = true;
205  static const bool allow_remap = false;
207  static const bool updating = true;
208 
209  void forward(ForwardArgs<Writer> &args) { TMBAD_ASSERT(false); }
210  void reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
211  const char *op_name() { return "MatMul"; }
212 };
213 
214 } // namespace TMBad
215 #endif // HAVE_AD_BLAS_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
Operator with input/output dimension known at compile time.
Definition: global.hpp:1491
global * get_glob()
Get pointer to current global AD context (or NULL if no context is active).
Definition: TMBad.cpp:690
ad_plain add_to_stack(Scalar result=0)
Add nullary operator to the stack based on its result
Definition: global.hpp:2448
diff --git a/atomic__macro_8hpp_source.html b/atomic__macro_8hpp_source.html index c1af2914f..c7495b450 100644 --- a/atomic__macro_8hpp_source.html +++ b/atomic__macro_8hpp_source.html @@ -73,8 +73,8 @@
atomic_macro.hpp
-
1 // Copyright (C) 2013-2015 Kasper Kristensen
2 // License: GPL-2
3 
4 /* Flag to detect if any atomic functions have been created */
5 TMB_EXTERN bool atomicFunctionGenerated CSKIP(= false;)
6 
8 #define TMB_ATOMIC_VECTOR_FUNCTION(ATOMIC_NAME, OUTPUT_DIM, ATOMIC_DOUBLE, \
9  ATOMIC_REVERSE) \
10  \
11  template<class Double> \
12  void ATOMIC_NAME(const CppAD::vector<Double>& tx, \
13  CppAD::vector<Double>& ty) CSKIP({ \
14  ATOMIC_DOUBLE; \
15  }) \
16  template<class Double> \
17  CppAD::vector<double> ATOMIC_NAME(const CppAD::vector<Double>& tx) CSKIP({ \
18  CppAD::vector<double> ty(OUTPUT_DIM); \
19  ATOMIC_NAME(tx, ty); \
20  return ty; \
21  }) \
22  IF_TMB_PRECOMPILE( \
23  template \
24  void ATOMIC_NAME<double>(const CppAD::vector<double>& tx, \
25  CppAD::vector<double>& ty); \
26  template \
27  CppAD::vector<double> ATOMIC_NAME<double>(const CppAD::vector<double>& tx); \
28  ) \
29  template <class Type> \
30  void ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx, \
31  CppAD::vector<AD<Type> >& ty); \
32  template <class Type> \
33  CppAD::vector<AD<Type> > ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx); \
34  template <class Type> \
35  class atomic##ATOMIC_NAME : public CppAD::atomic_base<Type> { \
36  public: \
37  atomic##ATOMIC_NAME(const char* name) : CppAD::atomic_base<Type>(name) { \
38  atomic::atomicFunctionGenerated = true; \
39  if (config.trace.atomic) \
40  std::cout << "Constructing atomic " << #ATOMIC_NAME << "\n"; \
41  this->option(CppAD::atomic_base<Type>::bool_sparsity_enum); \
42  } \
43  \
44  private: \
45  virtual bool forward(size_t p, size_t q, const CppAD::vector<bool>& vx, \
46  CppAD::vector<bool>& vy, \
47  const CppAD::vector<Type>& tx, \
48  CppAD::vector<Type>& ty) { \
49  if (q > 0) \
50  Rf_error("Atomic '" #ATOMIC_NAME "' order not implemented.\n"); \
51  if (vx.size() > 0) { \
52  bool anyvx = false; \
53  for (size_t i = 0; i < vx.size(); i++) anyvx |= vx[i]; \
54  for (size_t i = 0; i < vy.size(); i++) vy[i] = anyvx; \
55  } \
56  ATOMIC_NAME(tx, ty); \
57  return true; \
58  } \
59  virtual bool reverse(size_t q, const CppAD::vector<Type>& tx, \
60  const CppAD::vector<Type>& ty, \
61  CppAD::vector<Type>& px, \
62  const CppAD::vector<Type>& py) { \
63  if (q > 0) \
64  Rf_error("Atomic '" #ATOMIC_NAME "' order not implemented.\n"); \
65  ATOMIC_REVERSE; \
66  return true; \
67  } \
68  virtual bool rev_sparse_jac(size_t q, const CppAD::vector<bool>& rt, \
69  CppAD::vector<bool>& st) { \
70  bool anyrt = false; \
71  for (size_t i = 0; i < rt.size(); i++) anyrt |= rt[i]; \
72  for (size_t i = 0; i < st.size(); i++) st[i] = anyrt; \
73  return true; \
74  } \
75  virtual bool rev_sparse_jac(size_t q, \
76  const CppAD::vector<std::set<size_t> >& rt, \
77  CppAD::vector<std::set<size_t> >& st) { \
78  Rf_error("Should not be called"); \
79  } \
80  }; \
81  template <class Type> \
82  void ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx, \
83  CppAD::vector<AD<Type> >& ty) { \
84  static atomic##ATOMIC_NAME<Type> afun##ATOMIC_NAME( \
85  "atomic_" #ATOMIC_NAME); \
86  afun##ATOMIC_NAME(tx, ty); \
87  } \
88  template <class Type> \
89  CppAD::vector<AD<Type> > ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx) { \
90  CppAD::vector<AD<Type> > ty(OUTPUT_DIM); \
91  ATOMIC_NAME(tx, ty); \
92  return ty; \
93  }
94 
95 #define TMB_ATOMIC_STATIC_FUNCTION( \
96  ATOMIC_NAME, \
97  INPUT_SIZE, \
98  ATOMIC_DOUBLE, \
99  ATOMIC_REVERSE \
100 ) \
101 template<class dummy=void> \
102 double ATOMIC_NAME (const double *tx) { \
103  double ty[1]; \
104  ATOMIC_DOUBLE; \
105  return ty[0]; \
106 } \
107 template <class Type> \
108 CppAD::vector<AD<Type> > ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx);\
109 template<class Type> \
110 Type ATOMIC_NAME (const Type *tx) { \
111  CppAD::vector<Type> tx_(INPUT_SIZE); \
112  for (size_t i=0; i<INPUT_SIZE; i++) tx_[i]=tx[i]; \
113  return ATOMIC_NAME(tx_)[0]; \
114 } \
115 TMB_ATOMIC_VECTOR_FUNCTION( \
116  ATOMIC_NAME, \
117  1, \
118  ATOMIC_DOUBLE, \
119  ATOMIC_REVERSE \
120 )
121 // Helper to forward declare atomic
122 #define TMB_ATOMIC_VECTOR_FUNCTION_DECLARE(ATOMIC_NAME) \
123 template<class T> \
124 CppAD::vector<AD<T> > ATOMIC_NAME(const CppAD::vector<AD<T> > &x); \
125 template<class Double> \
126 CppAD::vector<double> ATOMIC_NAME(const CppAD::vector<Double > &x);
127 // Helper to forward define atomic
128 #define TMB_ATOMIC_VECTOR_FUNCTION_DEFINE(ATOMIC_NAME, \
129  OUTPUT_DIM, \
130  ATOMIC_DOUBLE, \
131  ATOMIC_REVERSE) \
132 TMB_ATOMIC_VECTOR_FUNCTION(ATOMIC_NAME, \
133  OUTPUT_DIM, \
134  ATOMIC_DOUBLE, \
135  ATOMIC_REVERSE)
Namespace with special functions and derivatives.
-
Definition: TMB.hpp:127
+
1 // Copyright (C) 2013-2015 Kasper Kristensen
2 // License: GPL-2
3 
4 /* Flag to detect if any atomic functions have been created */
5 TMB_EXTERN bool atomicFunctionGenerated CSKIP(= false;)
6 
8 #define TMB_ATOMIC_VECTOR_FUNCTION(ATOMIC_NAME, OUTPUT_DIM, ATOMIC_DOUBLE, \
9  ATOMIC_REVERSE) \
10  \
11  template<class Double> \
12  void ATOMIC_NAME(const CppAD::vector<Double>& tx, \
13  CppAD::vector<Double>& ty) CSKIP_ATOMIC({ \
14  ATOMIC_DOUBLE; \
15  }) \
16  template<class Double> \
17  CppAD::vector<double> \
18  ATOMIC_NAME(const CppAD::vector<Double>& tx) CSKIP_ATOMIC({ \
19  CppAD::vector<double> ty(OUTPUT_DIM); \
20  ATOMIC_NAME(tx, ty); \
21  return ty; \
22  }) \
23  IF_TMB_PRECOMPILE_ATOMICS( \
24  template \
25  void ATOMIC_NAME<double>(const CppAD::vector<double>& tx, \
26  CppAD::vector<double>& ty); \
27  template \
28  CppAD::vector<double> ATOMIC_NAME<double>(const CppAD::vector<double>& tx); \
29  ) \
30  template <class Type> \
31  void ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx, \
32  CppAD::vector<AD<Type> >& ty); \
33  template <class Type> \
34  CppAD::vector<AD<Type> > ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx); \
35  template <class Type> \
36  class atomic##ATOMIC_NAME : public CppAD::atomic_base<Type> { \
37  public: \
38  atomic##ATOMIC_NAME(const char* name) : CppAD::atomic_base<Type>(name) { \
39  atomic::atomicFunctionGenerated = true; \
40  if (config.trace.atomic) \
41  std::cout << "Constructing atomic " << #ATOMIC_NAME << "\n"; \
42  this->option(CppAD::atomic_base<Type>::bool_sparsity_enum); \
43  } \
44  \
45  private: \
46  virtual bool forward(size_t p, size_t q, const CppAD::vector<bool>& vx, \
47  CppAD::vector<bool>& vy, \
48  const CppAD::vector<Type>& tx, \
49  CppAD::vector<Type>& ty) { \
50  if (q > 0) \
51  Rf_error("Atomic '" #ATOMIC_NAME "' order not implemented.\n"); \
52  if (vx.size() > 0) { \
53  bool anyvx = false; \
54  for (size_t i = 0; i < vx.size(); i++) anyvx |= vx[i]; \
55  for (size_t i = 0; i < vy.size(); i++) vy[i] = anyvx; \
56  } \
57  ATOMIC_NAME(tx, ty); \
58  return true; \
59  } \
60  virtual bool reverse(size_t q, const CppAD::vector<Type>& tx, \
61  const CppAD::vector<Type>& ty, \
62  CppAD::vector<Type>& px, \
63  const CppAD::vector<Type>& py) { \
64  if (q > 0) \
65  Rf_error("Atomic '" #ATOMIC_NAME "' order not implemented.\n"); \
66  ATOMIC_REVERSE; \
67  return true; \
68  } \
69  virtual bool rev_sparse_jac(size_t q, const CppAD::vector<bool>& rt, \
70  CppAD::vector<bool>& st) { \
71  bool anyrt = false; \
72  for (size_t i = 0; i < rt.size(); i++) anyrt |= rt[i]; \
73  for (size_t i = 0; i < st.size(); i++) st[i] = anyrt; \
74  return true; \
75  } \
76  virtual bool rev_sparse_jac(size_t q, \
77  const CppAD::vector<std::set<size_t> >& rt, \
78  CppAD::vector<std::set<size_t> >& st) { \
79  Rf_error("Should not be called"); \
80  } \
81  }; \
82  template <class Type> \
83  void ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx, \
84  CppAD::vector<AD<Type> >& ty) { \
85  static atomic##ATOMIC_NAME<Type> afun##ATOMIC_NAME( \
86  "atomic_" #ATOMIC_NAME); \
87  afun##ATOMIC_NAME(tx, ty); \
88  } \
89  template <class Type> \
90  CppAD::vector<AD<Type> > ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx) { \
91  CppAD::vector<AD<Type> > ty(OUTPUT_DIM); \
92  ATOMIC_NAME(tx, ty); \
93  return ty; \
94  }
95 
96 #define TMB_ATOMIC_STATIC_FUNCTION( \
97  ATOMIC_NAME, \
98  INPUT_SIZE, \
99  ATOMIC_DOUBLE, \
100  ATOMIC_REVERSE \
101 ) \
102 template<class dummy=void> \
103 double ATOMIC_NAME (const double *tx) { \
104  double ty[1]; \
105  ATOMIC_DOUBLE; \
106  return ty[0]; \
107 } \
108 template <class Type> \
109 CppAD::vector<AD<Type> > ATOMIC_NAME(const CppAD::vector<AD<Type> >& tx);\
110 template<class Type> \
111 Type ATOMIC_NAME (const Type *tx) { \
112  CppAD::vector<Type> tx_(INPUT_SIZE); \
113  for (size_t i=0; i<INPUT_SIZE; i++) tx_[i]=tx[i]; \
114  return ATOMIC_NAME(tx_)[0]; \
115 } \
116 TMB_ATOMIC_VECTOR_FUNCTION( \
117  ATOMIC_NAME, \
118  1, \
119  ATOMIC_DOUBLE, \
120  ATOMIC_REVERSE \
121 )
122 // Helper to forward declare atomic
123 #define TMB_ATOMIC_VECTOR_FUNCTION_DECLARE(ATOMIC_NAME) \
124 template<class T> \
125 CppAD::vector<AD<T> > ATOMIC_NAME(const CppAD::vector<AD<T> > &x); \
126 template<class Double> \
127 CppAD::vector<double> ATOMIC_NAME(const CppAD::vector<Double > &x);
128 // Helper to forward define atomic
129 #define TMB_ATOMIC_VECTOR_FUNCTION_DEFINE(ATOMIC_NAME, \
130  OUTPUT_DIM, \
131  ATOMIC_DOUBLE, \
132  ATOMIC_REVERSE) \
133 TMB_ATOMIC_VECTOR_FUNCTION(ATOMIC_NAME, \
134  OUTPUT_DIM, \
135  ATOMIC_DOUBLE, \
136  ATOMIC_REVERSE)
Namespace with special functions and derivatives.
+
Definition: TMB.hpp:132
Type rt(Type df)
Simulate from a Student&#39;s t distribution.
#define TMB_ATOMIC_VECTOR_FUNCTION( ATOMIC_NAME, OUTPUT_DIM, ATOMIC_DOUBLE, ATOMIC_REVERSE)
Construct atomic vector function based on known derivatives.
bool atomic
Trace construction of atomic functions.
Definition: config.hpp:31
diff --git a/checkpoint_8hpp_source.html b/checkpoint_8hpp_source.html index 0c5ddb4c5..09699d658 100644 --- a/checkpoint_8hpp_source.html +++ b/checkpoint_8hpp_source.html @@ -73,7 +73,7 @@
checkpoint.hpp
-
1 #ifndef HAVE_CHECKPOINT_HPP
2 #define HAVE_CHECKPOINT_HPP
3 // Autogenerated - do not edit by hand !
4 #include <memory>
5 #include "global.hpp"
6 #include "vectorize.hpp"
7 
8 namespace TMBad {
9 
11 template <class ADFun, bool packed_ = false>
12 struct standard_derivative_table : std::vector<ADFun> {
13  static const bool packed = packed_;
15  void requireOrder(size_t n) {
16  while ((*this).size() <= n) {
17  (*this).push_back((*this).back().WgtJacFun());
18  }
19  }
21  void retape(ForwardArgs<Scalar> &args) {}
23  standard_derivative_table(const ADFun &F) : std::vector<ADFun>(1, F) {}
24 };
25 
32  static const bool packed = false;
33  std::vector<Scalar> x_prev;
34  bool operator()(const std::vector<Scalar> &x);
35 };
36 
38 template <class Functor, class ADFun, class Test = ParametersChanged,
39  bool packed_ = false>
41  Functor F;
42  Test test;
46  size_t n = (*this)[0].Domain();
47  std::vector<Scalar> x = args.x_segment(0, n);
48  bool change = test(x);
49  if (change) {
50  (*this).resize(1);
51  (*this)[0] = ADFun(F, x);
52  }
53  }
56  template <class V>
57  retaping_derivative_table(const Functor &F, const V &x, Test test = Test())
58  : standard_derivative_table<ADFun, packed_>(ADFun(F, x)),
59  F(F),
60  test(test) {}
61 };
62 
94 template <class T>
96  typedef std::shared_ptr<T> Base;
97  Base sp;
98  std::shared_ptr<std::vector<std::weak_ptr<T> > > weak_refs;
99 
100  omp_shared_ptr(const Base &x)
101  : sp(x), weak_refs(std::make_shared<std::vector<std::weak_ptr<T> > >()) {
102  (*weak_refs).resize(TMBAD_MAX_NUM_THREADS);
103  (*weak_refs)[TMBAD_THREAD_NUM] = x;
104  }
105  omp_shared_ptr(const omp_shared_ptr &other) : weak_refs(other.weak_refs) {
106  if ((*weak_refs)[TMBAD_THREAD_NUM].expired()) {
107  sp = std::make_shared<T>(*other);
108 
109  (*weak_refs)[TMBAD_THREAD_NUM] = sp;
110  } else {
111  sp = (*weak_refs)[TMBAD_THREAD_NUM].lock();
112  }
113  }
114  omp_shared_ptr() {}
115  T &operator*() const { return *sp; }
116  T *operator->() const { return sp.get(); }
117  explicit operator bool() const { return (bool)sp; }
118 };
119 
166 template <class DerivativeTable>
167 struct AtomOp : global::DynamicOperator<-1, -1> {
168  static const bool have_input_size_output_size = true;
169  static const bool add_forward_replay_copy = true;
170 
171  TMBAD_SHARED_PTR<DerivativeTable> dtab;
172 
173  int order;
174 
175  template <class T1>
176  AtomOp(const T1 &F) : dtab(std::make_shared<DerivativeTable>(F)), order(0) {}
177  template <class T1, class T2>
178  AtomOp(const T1 &F, const T2 &x)
179  : dtab(std::make_shared<DerivativeTable>(F, x)), order(0) {}
180  template <class T1, class T2, class T3>
181  AtomOp(const T1 &F, const T2 &x, const T3 &t)
182  : dtab(std::make_shared<DerivativeTable>(F, x, t)), order(0) {}
183 
184  Index input_size() const { return (*dtab)[order].Domain(); }
185  Index output_size() const { return (*dtab)[order].Range(); }
186 
187  void forward(ForwardArgs<Scalar> &args) {
188  (*dtab).retape(args);
189 
190  (*dtab).requireOrder(order);
191 
192  size_t n = input_size();
193  size_t m = output_size();
194 
195  auto x = args.x_segment(0, n);
196 
197  args.y_segment(0, m) = (*dtab)[order](x);
198  }
199 
200  void reverse(ReverseArgs<Scalar> &args) {
201  size_t n = input_size();
202  size_t m = output_size();
203 
204  auto x = args.x_segment(0, n);
205  auto w = args.dy_segment(0, m);
206 
207  args.dx_segment(0, n) += (*dtab)[order].Jacobian(x, w);
208  }
209 
210  void reverse(ReverseArgs<global::Replay> &args) {
211  size_t n = input_size();
212  size_t m = output_size();
213 
214  std::vector<global::Replay> x = args.x_segment(0, n);
215  if (DerivativeTable::packed) x = repack(x);
216  std::vector<global::Replay> w = args.dy_segment(0, m);
217  std::vector<global::Replay> xw;
218  xw.insert(xw.end(), x.begin(), x.end());
219  xw.insert(xw.end(), w.begin(), w.end());
220 
221  (*dtab).requireOrder(order + 1);
222  AtomOp cpy(*this);
223  cpy.order++;
224  args.dx_segment(0, n) += global::Complete<AtomOp>(cpy)(xw);
225  }
226 
227  template <class T>
228  void forward(ForwardArgs<T> &args) {
229  TMBAD_ASSERT(false);
230  }
231  void reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
232 
233  const char *op_name() { return "AtomOp"; }
234 
235  void print(global::print_config cfg) {
236  Rcout << cfg.prefix;
237  Rcout << "order=" << order << " ";
238  Rcout << "(*dtab).size()=" << (*dtab).size() << " ";
239  Rcout << "dtab=" << &(*dtab) << "\n";
240  (*dtab)[order].print(cfg);
241  }
242 };
243 
252 template <class Functor>
253 struct PackWrap {
254  Functor F;
255  PackWrap(const Functor &F) : F(F) {}
258  template <class T>
259  std::vector<T> operator()(const std::vector<T> &xp) {
260  Index K = ScalarPack<SegmentRef>::size;
261  size_t n = xp.size() / K;
262  TMBAD_ASSERT2(n * K == xp.size(), "Invalid packed arguments");
263  std::vector<ad_segment> x(n);
264  for (size_t i = 0; i < n; i++) x[i] = unpack(xp, i);
265  ad_segment y = F(x);
266  ad_segment yp = pack(y);
267  std::vector<T> ans = concat(std::vector<ad_segment>(1, yp));
268  return ans;
269  }
272  bool operator()(const std::vector<Scalar> &xp) {
273  Index K = ScalarPack<SegmentRef>::size;
274  size_t n = xp.size() / K;
275  TMBAD_ASSERT2(n * K == xp.size(), "Invalid packed arguments");
276  std::vector<Scalar *> x(n);
277  for (size_t i = 0; i < n; i++) x[i] = unpack(xp, i);
278  return F(x);
279  }
280 };
281 
282 } // namespace TMBad
283 #endif // HAVE_CHECKPOINT_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_CHECKPOINT_HPP
2 #define HAVE_CHECKPOINT_HPP
3 // Autogenerated - do not edit by hand !
4 #include <memory>
5 #include "global.hpp"
6 #include "vectorize.hpp"
7 
8 namespace TMBad {
9 
11 template <class ADFun, bool packed_ = false>
12 struct standard_derivative_table : std::vector<ADFun> {
13  static const bool packed = packed_;
15  void requireOrder(size_t n) {
16  while ((*this).size() <= n) {
17  (*this).push_back((*this).back().WgtJacFun());
18  }
19  }
21  void retape(ForwardArgs<Scalar> &args) {}
23  standard_derivative_table(const ADFun &F) : std::vector<ADFun>(1, F) {}
24 };
25 
32  static const bool packed = false;
33  std::vector<Scalar> x_prev;
34  bool operator()(const std::vector<Scalar> &x);
35 };
36 
38 template <class Functor, class ADFun, class Test = ParametersChanged,
39  bool packed_ = false>
41  Functor F;
42  Test test;
46  size_t n = (*this)[0].Domain();
47  std::vector<Scalar> x = args.x_segment(0, n);
48  bool change = test(x);
49  if (change) {
50  (*this).resize(1);
51  (*this)[0] = ADFun(F, x);
52  }
53  }
56  template <class V>
57  retaping_derivative_table(const Functor &F, const V &x, Test test = Test())
58  : standard_derivative_table<ADFun, packed_>(ADFun(F, x)),
59  F(F),
60  test(test) {}
61 };
62 
94 template <class T>
96  typedef std::shared_ptr<T> Base;
97  Base sp;
98  std::shared_ptr<std::vector<std::weak_ptr<T> > > weak_refs;
99 
100  omp_shared_ptr(const Base &x)
101  : sp(x), weak_refs(std::make_shared<std::vector<std::weak_ptr<T> > >()) {
102  (*weak_refs).resize(TMBAD_MAX_NUM_THREADS);
103  (*weak_refs)[TMBAD_THREAD_NUM] = x;
104  }
105  omp_shared_ptr(const omp_shared_ptr &other) : weak_refs(other.weak_refs) {
106  if ((*weak_refs)[TMBAD_THREAD_NUM].expired()) {
107  sp = std::make_shared<T>(*other);
108 
109  (*weak_refs)[TMBAD_THREAD_NUM] = sp;
110  } else {
111  sp = (*weak_refs)[TMBAD_THREAD_NUM].lock();
112  }
113  }
114  omp_shared_ptr() {}
115  T &operator*() const { return *sp; }
116  T *operator->() const { return sp.get(); }
117  explicit operator bool() const { return (bool)sp; }
118 };
119 
166 template <class DerivativeTable>
167 struct AtomOp : global::DynamicOperator<-1, -1> {
168  static const bool have_input_size_output_size = true;
169  static const bool add_forward_replay_copy = true;
170 
171  TMBAD_SHARED_PTR<DerivativeTable> dtab;
172 
173  int order;
174 
175  template <class T1>
176  AtomOp(const T1 &F) : dtab(std::make_shared<DerivativeTable>(F)), order(0) {}
177  template <class T1, class T2>
178  AtomOp(const T1 &F, const T2 &x)
179  : dtab(std::make_shared<DerivativeTable>(F, x)), order(0) {}
180  template <class T1, class T2, class T3>
181  AtomOp(const T1 &F, const T2 &x, const T3 &t)
182  : dtab(std::make_shared<DerivativeTable>(F, x, t)), order(0) {}
183 
184  Index input_size() const { return (*dtab)[order].Domain(); }
185  Index output_size() const { return (*dtab)[order].Range(); }
186 
187  void forward(ForwardArgs<Scalar> &args) {
188  (*dtab).retape(args);
189 
190  (*dtab).requireOrder(order);
191 
192  size_t n = input_size();
193  size_t m = output_size();
194 
195  auto x = args.x_segment(0, n);
196 
197  args.y_segment(0, m) = (*dtab)[order](x);
198  }
199 
200  void reverse(ReverseArgs<Scalar> &args) {
201  size_t n = input_size();
202  size_t m = output_size();
203 
204  auto x = args.x_segment(0, n);
205  auto w = args.dy_segment(0, m);
206 
207  args.dx_segment(0, n) += (*dtab)[order].Jacobian(x, w);
208  }
209 
210  void reverse(ReverseArgs<global::Replay> &args) {
211  size_t n = input_size();
212  size_t m = output_size();
213 
214  std::vector<global::Replay> x = args.x_segment(0, n);
215  if (DerivativeTable::packed) x = repack(x);
216  std::vector<global::Replay> w = args.dy_segment(0, m);
217  std::vector<global::Replay> xw;
218  xw.insert(xw.end(), x.begin(), x.end());
219  xw.insert(xw.end(), w.begin(), w.end());
220 
221  (*dtab).requireOrder(order + 1);
222  AtomOp cpy(*this);
223  cpy.order++;
224  args.dx_segment(0, n) += global::Complete<AtomOp>(cpy)(xw);
225  }
226 
227  template <class T>
228  void forward(ForwardArgs<T> &args) {
229  TMBAD_ASSERT(false);
230  }
231  void reverse(ReverseArgs<Writer> &args) { TMBAD_ASSERT(false); }
232 
233  const char *op_name() { return "AtomOp"; }
234 
235  void print(global::print_config cfg) {
236  Rcout << cfg.prefix;
237  Rcout << "order=" << order << " ";
238  Rcout << "(*dtab).size()=" << (*dtab).size() << " ";
239  Rcout << "dtab=" << &(*dtab) << "\n";
240  (*dtab)[order].print(cfg);
241  }
242 };
243 
252 template <class Functor>
253 struct PackWrap {
254  Functor F;
255  PackWrap(const Functor &F) : F(F) {}
258  template <class T>
259  std::vector<T> operator()(const std::vector<T> &xp) {
260  Index K = ScalarPack<SegmentRef>::size;
261  size_t n = xp.size() / K;
262  TMBAD_ASSERT2(n * K == xp.size(), "Invalid packed arguments");
263  std::vector<ad_segment> x(n);
264  for (size_t i = 0; i < n; i++) x[i] = unpack(xp, i);
265  ad_segment y = F(x);
266  ad_segment yp = pack(y);
267  std::vector<T> ans = concat(std::vector<ad_segment>(1, yp));
268  return ans;
269  }
272  bool operator()(const std::vector<Scalar> &xp) {
273  Index K = ScalarPack<SegmentRef>::size;
274  size_t n = xp.size() / K;
275  TMBAD_ASSERT2(n * K == xp.size(), "Invalid packed arguments");
276  std::vector<Scalar *> x(n);
277  for (size_t i = 0; i < n; i++) x[i] = unpack(xp, i);
278  return F(x);
279  }
280 };
281 
282 } // namespace TMBad
283 #endif // HAVE_CHECKPOINT_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
segment_ref< ReverseArgs, dx_write > dx_segment(Index from, Index size)
segment version
Definition: global.hpp:344
Vector class used by TMB.
Definition: vector.hpp:17
segment_ref< ReverseArgs, x_read > x_segment(Index from, Index size)
segment version
Definition: global.hpp:336
diff --git a/code__generator_8hpp_source.html b/code__generator_8hpp_source.html index 1c687c921..c650e988b 100644 --- a/code__generator_8hpp_source.html +++ b/code__generator_8hpp_source.html @@ -73,6 +73,6 @@
code_generator.hpp
-
1 #ifndef HAVE_CODE_GENERATOR_HPP
2 #define HAVE_CODE_GENERATOR_HPP
3 // Autogenerated - do not edit by hand !
4 #include <fstream>
5 #include <iostream>
6 #include <sstream>
7 #include "global.hpp"
8 
9 namespace TMBad {
10 
11 void searchReplace(std::string& str, const std::string& oldStr,
12  const std::string& newStr);
13 
14 struct code_config {
15  bool asm_comments;
16  bool gpu;
17  std::string indent;
18  std::string header_comment;
19  std::string float_str;
20  std::ostream* cout;
21  std::string float_ptr();
22  std::string void_str();
23  void init_code();
24  void write_header_comment();
25  code_config();
26 };
27 
28 void write_common(std::ostringstream& buffer, code_config cfg, size_t node);
29 
30 void write_forward(global& glob, code_config cfg = code_config());
31 
32 void write_reverse(global& glob, code_config cfg = code_config());
33 
34 void write_all(global glob, code_config cfg = code_config());
35 
36 } // namespace TMBad
37 #endif // HAVE_CODE_GENERATOR_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_CODE_GENERATOR_HPP
2 #define HAVE_CODE_GENERATOR_HPP
3 // Autogenerated - do not edit by hand !
4 #include <fstream>
5 #include <iostream>
6 #include <sstream>
7 #include "global.hpp"
8 
9 namespace TMBad {
10 
11 void searchReplace(std::string& str, const std::string& oldStr,
12  const std::string& newStr);
13 
14 struct code_config {
15  bool asm_comments;
16  bool gpu;
17  std::string indent;
18  std::string header_comment;
19  std::string float_str;
20  std::ostream* cout;
21  std::string float_ptr();
22  std::string void_str();
23  void init_code();
24  void write_header_comment();
25  code_config();
26 };
27 
28 void write_common(std::ostringstream& buffer, code_config cfg, size_t node);
29 
30 void write_forward(global& glob, code_config cfg = code_config());
31 
32 void write_reverse(global& glob, code_config cfg = code_config());
33 
34 void write_all(global glob, code_config cfg = code_config());
35 
36 } // namespace TMBad
37 #endif // HAVE_CODE_GENERATOR_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
License: GPL v2 diff --git a/compile_8hpp_source.html b/compile_8hpp_source.html index 91dbede13..afb0c61db 100644 --- a/compile_8hpp_source.html +++ b/compile_8hpp_source.html @@ -73,6 +73,6 @@
compile.hpp
-
1 #ifndef _WIN32
2 #ifndef HAVE_COMPILE_HPP
3 #define HAVE_COMPILE_HPP
4 // Autogenerated - do not edit by hand !
5 #include <dlfcn.h>
6 #include <stdlib.h>
7 #include "code_generator.hpp"
8 #include "global.hpp"
9 
10 namespace TMBad {
11 
12 void compile(global &glob, code_config cfg = code_config());
13 }
14 #endif // HAVE_COMPILE_HPP
15 #endif
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef _WIN32
2 #ifndef HAVE_COMPILE_HPP
3 #define HAVE_COMPILE_HPP
4 // Autogenerated - do not edit by hand !
5 #include <dlfcn.h>
6 #include <stdlib.h>
7 #include "code_generator.hpp"
8 #include "global.hpp"
9 
10 namespace TMBad {
11 
12 void compile(global &glob, code_config cfg = code_config());
13 }
14 #endif // HAVE_COMPILE_HPP
15 #endif
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
License: GPL v2 diff --git a/compression_8hpp_source.html b/compression_8hpp_source.html index e5cc8ad5e..0a1ab7f84 100644 --- a/compression_8hpp_source.html +++ b/compression_8hpp_source.html @@ -73,7 +73,7 @@
compression.hpp
-
1 #ifndef HAVE_COMPRESSION_HPP
2 #define HAVE_COMPRESSION_HPP
3 // Autogenerated - do not edit by hand !
4 #include "global.hpp"
5 #include "graph_transform.hpp" // subset
6 #include "radix.hpp" // first_occurance
7 
8 namespace TMBad {
9 
11 struct period {
13  size_t begin;
15  size_t size;
17  size_t rep;
18 };
19 
20 std::ostream &operator<<(std::ostream &os, const period &x);
21 
40 template <class T>
41 struct periodic {
42  const std::vector<T> &x;
47  periodic(const std::vector<T> &x, size_t max_period_size,
48  size_t min_period_rep = 2)
49  : x(x),
50  max_period_size(max_period_size),
51  min_period_rep(min_period_rep) {}
56  bool test_period(size_t start, size_t p) {
57  if (start + (p - 1) + p >= x.size()) return false;
58  for (size_t i = 0; i < p; i++) {
59  if (x[start + i] != x[start + i + p]) return false;
60  }
61  return true;
62  }
68  size_t numrep_period(size_t start, size_t p) {
69  size_t n = 1;
70  while (test_period(start, p)) {
71  n++;
72  start += p;
73  }
74  return n;
75  }
87  period find_best_period(size_t start) {
88  size_t p_best = -1, rep_best = 0;
89  for (size_t p = 1; p < max_period_size; p++) {
90  size_t rep = numrep_period(start, p);
91  if (rep > rep_best) {
92  p_best = p;
93  rep_best = rep;
94  p = p * rep;
95  }
96  }
97  period ans = {start, p_best, rep_best};
98  return ans;
99  }
100  std::vector<period> find_all() {
101  std::vector<period> ans;
102  for (size_t i = 0; i < x.size();) {
103  period result = find_best_period(i);
104  if (result.rep >= min_period_rep) {
105  ans.push_back(result);
106  i += result.size * result.rep;
107  } else {
108  i++;
109  }
110  }
111  return ans;
112  }
113 };
114 
115 template <class T>
116 struct matrix_view {
117  const T *x;
118  size_t nrow, ncol;
119  matrix_view(const T *x, size_t nrow, size_t ncol)
120  : x(x), nrow(nrow), ncol(ncol) {}
121  T operator()(size_t i, size_t j) const { return x[i + j * nrow]; }
122  size_t rows() const { return nrow; }
123  size_t cols() const { return ncol; }
124  template <class Diff_T>
125  std::vector<Diff_T> row_diff(size_t i) {
126  size_t nd = (cols() >= 1 ? cols() - 1 : 0);
127  std::vector<Diff_T> xd(nd);
128  for (size_t j = 1; j < cols(); j++)
129  xd[j - 1] = (Diff_T)(*this)(i, j) - (Diff_T)(*this)(i, j - 1);
130  return xd;
131  }
132 };
133 
147 std::vector<period> split_period(global *glob, period p,
148  size_t max_period_size);
149 
150 struct compressed_input {
151  typedef std::ptrdiff_t ptrdiff_t;
152 
153  mutable std::vector<ptrdiff_t> increment_pattern;
154  std::vector<Index> which_periodic;
155  std::vector<Index> period_sizes;
156  std::vector<Index> period_offsets;
157  std::vector<ptrdiff_t> period_data;
158 
159  Index n, m;
160  Index nrep;
161  Index np;
162 
163  mutable Index counter;
164  mutable std::vector<Index> inputs;
165  std::vector<Index> input_diff;
166  size_t input_size() const;
167  void update_increment_pattern() const;
168 
169  void increment(Args<> &args) const;
170 
171  void decrement(Args<> &args) const;
172  void forward_init(Args<> &args) const;
173  void reverse_init(Args<> &args);
174  void dependencies_intervals(Args<> &args, std::vector<Index> &lower,
175  std::vector<Index> &upper) const;
176 
177  size_t max_period_size;
178 
179  bool test_period(std::vector<ptrdiff_t> &x, size_t p);
180 
181  size_t find_shortest(std::vector<ptrdiff_t> &x);
182  compressed_input();
183  compressed_input(std::vector<Index> &x, size_t offset, size_t nrow, size_t m,
184  size_t ncol, size_t max_period_size);
185 };
186 
187 template <class T1, class T2>
188 struct compare_types {
189  const static bool equal = false;
190 };
191 template <class T>
192 struct compare_types<T, T> {
193  const static bool equal = true;
194 };
195 
196 void compress(global &glob, size_t max_period_size);
197 struct StackOp : global::SharedDynamicOperator {
198  typedef std::ptrdiff_t ptrdiff_t;
199  global::operation_stack opstack;
200  compressed_input ci;
201  StackOp(global *glob, period p, IndexPair ptr, size_t max_period_size);
203  StackOp(const StackOp &x);
204  void print(global::print_config cfg);
205  Index input_size() const;
206  Index output_size() const;
207  static const bool have_input_size_output_size = true;
214  template <class Type>
215  void forward(ForwardArgs<Type> args) {
216  ci.forward_init(args);
217 
218  size_t opstack_size = opstack.size();
219  for (size_t i = 0; i < ci.nrep; i++) {
220  for (size_t j = 0; j < opstack_size; j++) {
221  opstack[j]->forward_incr(args);
222  }
223  ci.increment(args);
224  }
225  if (compare_types<Type, Replay>::equal) {
226  compress(*get_glob(), ci.max_period_size);
227  }
228  }
229  void forward(ForwardArgs<Writer> &args);
236  template <class Type>
237  void reverse(ReverseArgs<Type> args) {
238  ci.reverse_init(args);
239  size_t opstack_size = opstack.size();
240  for (size_t i = 0; i < ci.nrep; i++) {
241  ci.decrement(args);
242 
243  for (size_t j = opstack_size; j > 0;) {
244  j--;
245  opstack[j]->reverse_decr(args);
246  }
247  }
248  if (compare_types<Type, Replay>::equal) {
249  compress(*get_glob(), ci.max_period_size);
250  }
251  }
252  void reverse(ReverseArgs<Writer> &args);
257  void dependencies(Args<> args, Dependencies &dep) const;
259  static const bool have_dependencies = true;
261  static const bool implicit_dependencies = true;
263  static const bool allow_remap = false;
264  const char *op_name();
265 };
266 
267 template <class T>
268 void trim(std::vector<T> &v, const T &elt) {
269  v.erase(std::remove(v.begin(), v.end(), elt), v.end());
270 }
271 
272 template <class T>
273 struct toposort_remap {
274  std::vector<T> &remap;
275  T i;
276  toposort_remap(std::vector<T> &remap, T i) : remap(remap), i(i) {}
277  void operator()(Index k) {
278  if (remap[k] >= remap[i]) {
279  remap[i] = i;
280  }
281  }
282 };
283 
308 void reorder_sub_expressions(global &glob);
309 
310 template <class T>
311 struct temporaries_remap {
312  std::vector<T> &remap;
313  T i;
314  temporaries_remap(std::vector<T> &remap, T i) : remap(remap), i(i) {}
315  void operator()(Index k) {
316  if (remap[k] == T(-1)) {
317  if (i > k + 1) remap[k] = i;
318  return;
319  }
320 
321  remap[k] = k;
322  }
323 };
324 
332 void reorder_temporaries(global &glob);
333 
334 template <class T>
335 struct dfs_add_to_stack {
336  std::vector<T> &stack;
337  std::vector<bool> &visited;
338  std::vector<T> &v2o;
339  dfs_add_to_stack(std::vector<T> &stack, std::vector<bool> &visited,
340  std::vector<T> &v2o)
341  : stack(stack), visited(visited), v2o(v2o) {}
342  void operator()(T var) {
343  Index op = v2o[var];
344  if (!visited[op]) {
345  stack.push_back(op);
346  visited[op] = true;
347  }
348  }
349 };
350 
354 void reorder_depth_first(global &glob);
355 
356 void compress(global &glob, size_t max_period_size = 1024);
357 
358 } // namespace TMBad
359 #endif // HAVE_COMPRESSION_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_COMPRESSION_HPP
2 #define HAVE_COMPRESSION_HPP
3 // Autogenerated - do not edit by hand !
4 #include "global.hpp"
5 #include "graph_transform.hpp" // subset
6 #include "radix.hpp" // first_occurance
7 
8 namespace TMBad {
9 
11 struct period {
13  size_t begin;
15  size_t size;
17  size_t rep;
18 };
19 
20 std::ostream &operator<<(std::ostream &os, const period &x);
21 
40 template <class T>
41 struct periodic {
42  const std::vector<T> &x;
47  periodic(const std::vector<T> &x, size_t max_period_size,
48  size_t min_period_rep = 2)
49  : x(x),
50  max_period_size(max_period_size),
51  min_period_rep(min_period_rep) {}
56  bool test_period(size_t start, size_t p) {
57  if (start + (p - 1) + p >= x.size()) return false;
58  for (size_t i = 0; i < p; i++) {
59  if (x[start + i] != x[start + i + p]) return false;
60  }
61  return true;
62  }
68  size_t numrep_period(size_t start, size_t p) {
69  size_t n = 1;
70  while (test_period(start, p)) {
71  n++;
72  start += p;
73  }
74  return n;
75  }
87  period find_best_period(size_t start) {
88  size_t p_best = -1, rep_best = 0;
89  for (size_t p = 1; p < max_period_size; p++) {
90  size_t rep = numrep_period(start, p);
91  if (rep > rep_best) {
92  p_best = p;
93  rep_best = rep;
94  p = p * rep;
95  }
96  }
97  period ans = {start, p_best, rep_best};
98  return ans;
99  }
100  std::vector<period> find_all() {
101  std::vector<period> ans;
102  for (size_t i = 0; i < x.size();) {
103  period result = find_best_period(i);
104  if (result.rep >= min_period_rep) {
105  ans.push_back(result);
106  i += result.size * result.rep;
107  } else {
108  i++;
109  }
110  }
111  return ans;
112  }
113 };
114 
115 template <class T>
116 struct matrix_view {
117  const T *x;
118  size_t nrow, ncol;
119  matrix_view(const T *x, size_t nrow, size_t ncol)
120  : x(x), nrow(nrow), ncol(ncol) {}
121  T operator()(size_t i, size_t j) const { return x[i + j * nrow]; }
122  size_t rows() const { return nrow; }
123  size_t cols() const { return ncol; }
124  template <class Diff_T>
125  std::vector<Diff_T> row_diff(size_t i) {
126  size_t nd = (cols() >= 1 ? cols() - 1 : 0);
127  std::vector<Diff_T> xd(nd);
128  for (size_t j = 1; j < cols(); j++)
129  xd[j - 1] = (Diff_T)(*this)(i, j) - (Diff_T)(*this)(i, j - 1);
130  return xd;
131  }
132 };
133 
147 std::vector<period> split_period(global *glob, period p,
148  size_t max_period_size);
149 
150 struct compressed_input {
151  typedef std::ptrdiff_t ptrdiff_t;
152 
153  mutable std::vector<ptrdiff_t> increment_pattern;
154  std::vector<Index> which_periodic;
155  std::vector<Index> period_sizes;
156  std::vector<Index> period_offsets;
157  std::vector<ptrdiff_t> period_data;
158 
159  Index n, m;
160  Index nrep;
161  Index np;
162 
163  mutable Index counter;
164  mutable std::vector<Index> inputs;
165  std::vector<Index> input_diff;
166  size_t input_size() const;
167  void update_increment_pattern() const;
168 
169  void increment(Args<> &args) const;
170 
171  void decrement(Args<> &args) const;
172  void forward_init(Args<> &args) const;
173  void reverse_init(Args<> &args);
174  void dependencies_intervals(Args<> &args, std::vector<Index> &lower,
175  std::vector<Index> &upper) const;
176 
177  size_t max_period_size;
178 
179  bool test_period(std::vector<ptrdiff_t> &x, size_t p);
180 
181  size_t find_shortest(std::vector<ptrdiff_t> &x);
182  compressed_input();
183  compressed_input(std::vector<Index> &x, size_t offset, size_t nrow, size_t m,
184  size_t ncol, size_t max_period_size);
185 };
186 
187 template <class T1, class T2>
188 struct compare_types {
189  const static bool equal = false;
190 };
191 template <class T>
192 struct compare_types<T, T> {
193  const static bool equal = true;
194 };
195 
196 void compress(global &glob, size_t max_period_size);
197 struct StackOp : global::SharedDynamicOperator {
198  typedef std::ptrdiff_t ptrdiff_t;
199  global::operation_stack opstack;
200  compressed_input ci;
201  StackOp(global *glob, period p, IndexPair ptr, size_t max_period_size);
203  StackOp(const StackOp &x);
204  void print(global::print_config cfg);
205  Index input_size() const;
206  Index output_size() const;
207  static const bool have_input_size_output_size = true;
214  template <class Type>
215  void forward(ForwardArgs<Type> args) {
216  ci.forward_init(args);
217 
218  size_t opstack_size = opstack.size();
219  for (size_t i = 0; i < ci.nrep; i++) {
220  for (size_t j = 0; j < opstack_size; j++) {
221  opstack[j]->forward_incr(args);
222  }
223  ci.increment(args);
224  }
225  if (compare_types<Type, Replay>::equal) {
226  compress(*get_glob(), ci.max_period_size);
227  }
228  }
229  void forward(ForwardArgs<Writer> &args);
236  template <class Type>
237  void reverse(ReverseArgs<Type> args) {
238  ci.reverse_init(args);
239  size_t opstack_size = opstack.size();
240  for (size_t i = 0; i < ci.nrep; i++) {
241  ci.decrement(args);
242 
243  for (size_t j = opstack_size; j > 0;) {
244  j--;
245  opstack[j]->reverse_decr(args);
246  }
247  }
248  if (compare_types<Type, Replay>::equal) {
249  compress(*get_glob(), ci.max_period_size);
250  }
251  }
252  void reverse(ReverseArgs<Writer> &args);
257  void dependencies(Args<> args, Dependencies &dep) const;
259  static const bool have_dependencies = true;
261  static const bool implicit_dependencies = true;
263  static const bool allow_remap = false;
264  const char *op_name();
265 };
266 
267 template <class T>
268 void trim(std::vector<T> &v, const T &elt) {
269  v.erase(std::remove(v.begin(), v.end(), elt), v.end());
270 }
271 
272 template <class T>
273 struct toposort_remap {
274  std::vector<T> &remap;
275  T i;
276  toposort_remap(std::vector<T> &remap, T i) : remap(remap), i(i) {}
277  void operator()(Index k) {
278  if (remap[k] >= remap[i]) {
279  remap[i] = i;
280  }
281  }
282 };
283 
308 void reorder_sub_expressions(global &glob);
309 
310 template <class T>
311 struct temporaries_remap {
312  std::vector<T> &remap;
313  T i;
314  temporaries_remap(std::vector<T> &remap, T i) : remap(remap), i(i) {}
315  void operator()(Index k) {
316  if (remap[k] == T(-1)) {
317  if (i > k + 1) remap[k] = i;
318  return;
319  }
320 
321  remap[k] = k;
322  }
323 };
324 
332 void reorder_temporaries(global &glob);
333 
334 template <class T>
335 struct dfs_add_to_stack {
336  std::vector<T> &stack;
337  std::vector<bool> &visited;
338  std::vector<T> &v2o;
339  dfs_add_to_stack(std::vector<T> &stack, std::vector<bool> &visited,
340  std::vector<T> &v2o)
341  : stack(stack), visited(visited), v2o(v2o) {}
342  void operator()(T var) {
343  Index op = v2o[var];
344  if (!visited[op]) {
345  stack.push_back(op);
346  visited[op] = true;
347  }
348  }
349 };
350 
354 void reorder_depth_first(global &glob);
355 
356 void compress(global &glob, size_t max_period_size = 1024);
357 
358 } // namespace TMBad
359 #endif // HAVE_COMPRESSION_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
void reorder_temporaries(global &glob)
Re-order computational graph to make it more compressible.
Definition: TMBad.cpp:567
size_t rep
Number of consecutive period replicates.
Definition: compression.hpp:17
size_t size
Size of the period.
Definition: compression.hpp:15
diff --git a/eigen__numtraits_8hpp_source.html b/eigen__numtraits_8hpp_source.html index c7245adca..7f5cd2f6d 100644 --- a/eigen__numtraits_8hpp_source.html +++ b/eigen__numtraits_8hpp_source.html @@ -73,7 +73,7 @@
eigen_numtraits.hpp
-
1 #ifndef HAVE_EIGEN_NUMTRAITS_HPP
2 #define HAVE_EIGEN_NUMTRAITS_HPP
3 // Autogenerated - do not edit by hand !
4 #include <Eigen/Core>
5 #include "global.hpp"
6 
7 namespace Eigen {
8 
9 template <>
10 struct NumTraits<TMBad::ad_aug> : NumTraits<TMBad::Scalar> {
11  typedef TMBad::ad_aug Real;
12  typedef TMBad::ad_aug NonInteger;
13  typedef TMBad::ad_aug Nested;
14 };
15 
16 template <>
17 struct NumTraits<TMBad::ad_adapt> : NumTraits<TMBad::Scalar> {
18  typedef TMBad::ad_adapt Real;
19  typedef TMBad::ad_adapt NonInteger;
20  typedef TMBad::ad_adapt Nested;
21 };
22 
23 template <typename BinOp>
24 struct ScalarBinaryOpTraits<TMBad::ad_aug, TMBad::Scalar, BinOp> {
25  typedef TMBad::ad_aug ReturnType;
26 };
27 template <typename BinOp>
28 struct ScalarBinaryOpTraits<TMBad::Scalar, TMBad::ad_aug, BinOp> {
29  typedef TMBad::ad_aug ReturnType;
30 };
31 
32 } // namespace Eigen
33 #endif // HAVE_EIGEN_NUMTRAITS_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_EIGEN_NUMTRAITS_HPP
2 #define HAVE_EIGEN_NUMTRAITS_HPP
3 // Autogenerated - do not edit by hand !
4 #include <Eigen/Core>
5 #include "global.hpp"
6 
7 namespace Eigen {
8 
9 template <>
10 struct NumTraits<TMBad::ad_aug> : NumTraits<TMBad::Scalar> {
11  typedef TMBad::ad_aug Real;
12  typedef TMBad::ad_aug NonInteger;
13  typedef TMBad::ad_aug Nested;
14 };
15 
16 template <>
17 struct NumTraits<TMBad::ad_adapt> : NumTraits<TMBad::Scalar> {
18  typedef TMBad::ad_adapt Real;
19  typedef TMBad::ad_adapt NonInteger;
20  typedef TMBad::ad_adapt Nested;
21 };
22 
23 template <typename BinOp>
24 struct ScalarBinaryOpTraits<TMBad::ad_aug, TMBad::Scalar, BinOp> {
25  typedef TMBad::ad_aug ReturnType;
26 };
27 template <typename BinOp>
28 struct ScalarBinaryOpTraits<TMBad::Scalar, TMBad::ad_aug, BinOp> {
29  typedef TMBad::ad_aug ReturnType;
30 };
31 
32 } // namespace Eigen
33 #endif // HAVE_EIGEN_NUMTRAITS_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
Enable weak comparison operators of an ad type.
Definition: global.hpp:2969
Augmented AD type.
Definition: global.hpp:2831
diff --git a/global_8hpp_source.html b/global_8hpp_source.html index ee982fb4b..92716f066 100644 --- a/global_8hpp_source.html +++ b/global_8hpp_source.html @@ -73,7 +73,7 @@
global.hpp
-
1 #ifndef HAVE_GLOBAL_HPP
2 #define HAVE_GLOBAL_HPP
3 // Autogenerated - do not edit by hand !
4 #include <algorithm>
5 #include <cmath>
6 #include <ctime>
7 #include <iomanip>
8 #include <iostream>
9 #include <limits>
10 #include <set>
11 #include <sstream>
12 #include <valarray>
13 #include <vector>
14 #include "config.hpp"
15 #include "radix.hpp"
16 
20 namespace TMBad {
21 
22 typedef TMBAD_HASH_TYPE hash_t;
23 typedef TMBAD_INDEX_TYPE Index;
24 typedef TMBAD_SCALAR_TYPE Scalar;
25 typedef std::pair<Index, Index> IndexPair;
26 typedef TMBAD_INDEX_VECTOR IndexVector;
27 
28 struct global;
31 global *get_glob();
32 
33 template <class T>
34 std::ostream &operator<<(std::ostream &out, const std::vector<T> &v) {
35  out << "{";
36  size_t last = v.size() - 1;
37  for (size_t i = 0; i < v.size(); ++i) {
38  out << v[i];
39  if (i != last) out << ", ";
40  }
41  out << "}";
42  return out;
43 }
44 
46 template <class T>
47 struct intervals {
48  struct ep : std::pair<T, bool> {
49  bool left() const { return !this->second; }
50  ep(T x, bool type) : std::pair<T, bool>(x, type) {}
51  operator T() { return this->first; }
52  };
53  std::set<ep> x;
54  typedef typename std::set<ep>::iterator iterator;
58  bool insert(T a, T b) {
59  ep x1(a, false);
60  ep x2(b, true);
61  iterator it1 = x.upper_bound(x1);
62  iterator it2 = x.lower_bound(x2);
63 
64  bool insert_x1 = (it1 == x.end()) || it1->left();
65  bool insert_x2 = (it2 == x.end()) || it2->left();
66 
67  bool change = (it1 != it2) || insert_x1;
68 
69  if (it1 != it2) {
70  x.erase(it1, it2);
71  }
72 
73  if (insert_x1) x.insert(x1);
74  if (insert_x2) x.insert(x2);
75  return change;
76  }
78  template <class F>
79  F &apply(F &f) const {
80  for (iterator it = x.begin(); it != x.end();) {
81  ep a = *it;
82  ++it;
83  ep b = *it;
84  ++it;
85  f(a, b);
86  }
87  return f;
88  }
89  struct print_interval {
90  void operator()(T a, T b) { Rcout << "[ " << a << " , " << b << " ] "; }
91  };
92  void print() {
93  print_interval f;
94  this->apply(f);
95  Rcout << "\n";
96  }
97 };
98 
99 struct Dependencies : std::vector<Index> {
100  typedef std::vector<Index> Base;
101  std::vector<std::pair<Index, Index> > I;
102  Dependencies();
103  void clear();
104  void add_interval(Index a, Index b);
105  void add_segment(Index start, Index size);
106 
107  void monotone_transform_inplace(const std::vector<Index> &x);
108 
109  template <class F>
110  F &apply(F &f) {
111  for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
112  for (size_t i = 0; i < I.size(); i++) {
113  for (Index j = I[i].first; j <= I[i].second; j++) {
114  f(j);
115  }
116  }
117  return f;
118  }
119 
120  template <class F>
121  F &apply_if_not_visited(F &f, intervals<Index> &visited) {
122  for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
123  for (size_t i = 0; i < I.size(); i++) {
124  if (visited.insert(I[i].first, I[i].second)) {
125  for (Index j = I[i].first; j <= I[i].second; j++) {
126  f(j);
127  }
128  }
129  }
130  return f;
131  }
132 
133  bool any(const std::vector<bool> &x) const;
134 };
135 
138 enum ArrayAccess { x_read, y_read, y_write, dx_read, dx_write, dy_read };
139 template <class Args, ArrayAccess What>
140 struct Accessor {};
141 template <class Args>
142 struct Accessor<Args, x_read> {
143  typename Args::value_type operator()(const Args &args, Index j) const {
144  return args.x(j);
145  }
146 };
147 template <class Args>
148 struct Accessor<Args, y_read> {
149  typename Args::value_type operator()(const Args &args, Index j) const {
150  return args.y(j);
151  }
152 };
153 template <class Args>
154 struct Accessor<Args, y_write> {
155  typename Args::value_type &operator()(Args &args, Index j) {
156  return args.y(j);
157  }
158 };
159 template <class Args>
160 struct Accessor<Args, dx_read> {
161  typename Args::value_type operator()(const Args &args, Index j) const {
162  return args.dx(j);
163  }
164 };
165 template <class Args>
166 struct Accessor<Args, dx_write> {
167  typename Args::value_type &operator()(Args &args, Index j) {
168  return args.dx(j);
169  }
170 };
171 template <class Args>
172 struct Accessor<Args, dy_read> {
173  typename Args::value_type operator()(const Args &args, Index j) const {
174  return args.dy(j);
175  }
176 };
177 
183 template <class T>
185  const std::vector<T> &x;
186  const std::vector<Index> &i;
187  IndirectAccessor(const std::vector<T> &x, const std::vector<Index> &i)
188  : x(x), i(i) {}
189  T operator[](size_t j) const { return x[i[j]]; }
190  size_t size() const { return i.size(); }
191  operator std::vector<T>() const {
192  std::vector<T> ans(i.size());
193  for (size_t j = 0; j < ans.size(); j++) ans[j] = (*this)[j];
194  return ans;
195  }
196 };
197 
205 template <class Args, ArrayAccess What>
206 struct segment_ref {
207  typedef typename Args::value_type Type;
208  Accessor<Args, What> element_access;
209  Args args;
210  Index from, n;
211  segment_ref(const Args &args, Index from, Index n)
212  : args(args), from(from), n(n) {}
213  template <class Other>
214  operator Other() {
215  Other ans(n);
216  for (size_t i = 0; i < n; i++) {
217  ans[i] = element_access(args, from + i);
218  }
219  return ans;
220  }
221  Type operator[](Index i) const { return element_access(args, from + i); }
222  size_t size() const { return n; }
223  template <class Other>
224  segment_ref &operator=(const Other &other) {
225  for (size_t i = 0; i < n; i++) {
226  element_access(args, from + i) = other[i];
227  }
228  return *this;
229  }
230  template <class Other>
231  segment_ref &operator+=(const Other &other) {
232  for (size_t i = 0; i < n; i++) {
233  element_access(args, from + i) += other[i];
234  }
235  return *this;
236  }
237  template <class Other>
238  segment_ref &operator-=(const Other &other) {
239  for (size_t i = 0; i < n; i++) {
240  element_access(args, from + i) -= other[i];
241  }
242  return *this;
243  }
244 };
245 
255 template <class dummy = void>
256 struct Args {
258  const Index *inputs;
263  IndexPair ptr;
265  Index input(Index j) const { return inputs[ptr.first + j]; }
267  Index output(Index j) const { return ptr.second + j; }
268  Args(const IndexVector &inputs) : inputs(inputs.data()) {
269  ptr.first = 0;
270  ptr.second = 0;
271  }
272 };
278 template <class Type>
279 struct ForwardArgs : Args<> {
280  typedef std::vector<Type> TypeVector;
281  typedef Type value_type;
282  Type *values;
283  global *glob_ptr;
285  Type x(Index j) const { return values[input(j)]; }
287  Type &y(Index j) { return values[output(j)]; }
289  Type *x_ptr(Index j) { return &values[input(j)]; }
291  Type *y_ptr(Index j) { return &values[output(j)]; }
293  segment_ref<ForwardArgs, x_read> x_segment(Index from, Index size) {
294  return segment_ref<ForwardArgs, x_read>(*this, from, size);
295  }
298  return segment_ref<ForwardArgs, y_write>(*this, from, size);
299  }
300  ForwardArgs(const IndexVector &inputs, TypeVector &values,
301  global *glob_ptr = NULL)
302  : Args<>(inputs), values(values.data()), glob_ptr(glob_ptr) {}
303 };
310 template <class Type>
311 struct ReverseArgs : Args<> {
312  typedef std::vector<Type> TypeVector;
313  typedef Type value_type;
314  Type *values;
315  Type *derivs;
316  global *glob_ptr;
318  Type x(Index j) const { return values[input(j)]; }
320  Type y(Index j) const { return values[output(j)]; }
323  Type &dx(Index j) { return derivs[input(j)]; }
326  Type dy(Index j) const { return derivs[output(j)]; }
328  Type *x_ptr(Index j) { return &values[input(j)]; }
330  Type *y_ptr(Index j) { return &values[output(j)]; }
332  Type *dx_ptr(Index j) { return &derivs[input(j)]; }
334  Type *dy_ptr(Index j) { return &derivs[output(j)]; }
336  segment_ref<ReverseArgs, x_read> x_segment(Index from, Index size) {
337  return segment_ref<ReverseArgs, x_read>(*this, from, size);
338  }
340  segment_ref<ReverseArgs, y_read> y_segment(Index from, Index size) {
341  return segment_ref<ReverseArgs, y_read>(*this, from, size);
342  }
345  return segment_ref<ReverseArgs, dx_write>(*this, from, size);
346  }
349  return segment_ref<ReverseArgs, dy_read>(*this, from, size);
350  }
351  ReverseArgs(const IndexVector &inputs, TypeVector &values, TypeVector &derivs,
352  global *glob_ptr = NULL)
353  : Args<>(inputs),
354  values(values.data()),
355  derivs(derivs.data()),
356  glob_ptr(glob_ptr) {
357  ptr.first = (Index)inputs.size();
358  ptr.second = (Index)values.size();
359  }
360 };
361 
362 template <>
363 struct ForwardArgs<bool> : Args<> {
364  typedef std::vector<bool> BoolVector;
365  BoolVector &values;
366  intervals<Index> &marked_intervals;
367  bool x(Index j) { return values[input(j)]; }
368  BoolVector::reference y(Index j) { return values[output(j)]; }
369  ForwardArgs(const IndexVector &inputs, BoolVector &values,
370  intervals<Index> &marked_intervals)
371  : Args<>(inputs), values(values), marked_intervals(marked_intervals) {}
373  template <class Operator>
374  bool any_marked_input(const Operator &op) {
375  if (Operator::implicit_dependencies) {
376  Dependencies dep;
377  op.dependencies(*this, dep);
378  return dep.any(values);
379  } else {
380  Index ninput = op.input_size();
381  for (Index j = 0; j < ninput; j++)
382  if (x(j)) return true;
383  }
384  return false;
385  }
387  template <class Operator>
388  void mark_all_output(const Operator &op) {
389  if (Operator::updating && op.output_size() == 0) {
390  Dependencies dep;
391  op.dependencies_updating(*this, dep);
392 
393  for (size_t i = 0; i < dep.size(); i++) values[dep[i]] = true;
394 
395  for (size_t i = 0; i < dep.I.size(); i++) {
396  Index a = dep.I[i].first;
397  Index b = dep.I[i].second;
398  bool insert = marked_intervals.insert(a, b);
399  if (insert) {
400  for (Index j = a; j <= b; j++) {
401  values[j] = true;
402  }
403  }
404  }
405  } else {
406  Index noutput = op.output_size();
407  for (Index j = 0; j < noutput; j++) y(j) = true;
408  }
409  }
411  template <class Operator>
412  bool mark_dense(const Operator &op) {
413  if (any_marked_input(op)) {
414  mark_all_output(op);
415  return true;
416  }
417  return false;
418  }
419 };
420 
421 template <>
422 struct ReverseArgs<bool> : Args<> {
423  typedef std::vector<bool> BoolVector;
424  BoolVector &values;
425  intervals<Index> &marked_intervals;
426  BoolVector::reference x(Index j) { return values[input(j)]; }
427  bool y(Index j) { return values[output(j)]; }
428  ReverseArgs(IndexVector &inputs, BoolVector &values,
429  intervals<Index> &marked_intervals)
430  : Args<>(inputs), values(values), marked_intervals(marked_intervals) {
431  ptr.first = (Index)inputs.size();
432  ptr.second = (Index)values.size();
433  }
435  template <class Operator>
436  bool any_marked_output(const Operator &op) {
437  if (Operator::elimination_protected) return true;
438  if (Operator::updating && op.output_size() == 0) {
439  Dependencies dep;
440  op.dependencies_updating(*this, dep);
441  return dep.any(values);
442  } else {
443  Index noutput = op.output_size();
444  for (Index j = 0; j < noutput; j++)
445  if (y(j)) return true;
446  }
447  return false;
448  }
450  template <class Operator>
451  void mark_all_input(const Operator &op) {
452  if (Operator::implicit_dependencies) {
453  Dependencies dep;
454  op.dependencies(*this, dep);
455 
456  for (size_t i = 0; i < dep.size(); i++) values[dep[i]] = true;
457 
458  for (size_t i = 0; i < dep.I.size(); i++) {
459  Index a = dep.I[i].first;
460  Index b = dep.I[i].second;
461  bool insert = marked_intervals.insert(a, b);
462  if (insert) {
463  for (Index j = a; j <= b; j++) {
464  values[j] = true;
465  }
466  }
467  }
468  } else {
469  Index ninput = op.input_size();
470  for (Index j = 0; j < ninput; j++) x(j) = true;
471  }
472  }
474  template <class Operator>
475  bool mark_dense(const Operator &op) {
476  if (any_marked_output(op)) {
477  mark_all_input(op);
478  return true;
479  }
480  return false;
481  }
482 };
483 
484 std::string tostr(const Index &x);
485 
486 std::string tostr(const Scalar &x);
487 
488 struct Writer : std::string {
489  static std::ostream *cout;
490  Writer(std::string str);
491  Writer(Scalar x);
492  Writer();
493 
494  template <class V>
495  std::string vinit(const V &x) {
496  std::string y = "{";
497  for (size_t i = 0; i < x.size(); i++)
498  y = y + (i == 0 ? "" : ",") + tostr(x[i]);
499  y = y + "}";
500  return y;
501  }
502 
503  std::string p(std::string x);
504  Writer operator+(const Writer &other);
505  Writer operator-(const Writer &other);
506  Writer operator-();
507  Writer operator*(const Writer &other);
508  Writer operator/(const Writer &other);
509 
510  Writer operator*(const Scalar &other);
511  Writer operator+(const Scalar &other);
512 
513  void operator=(const Writer &other);
514  void operator+=(const Writer &other);
515  void operator-=(const Writer &other);
516  void operator*=(const Writer &other);
517  void operator/=(const Writer &other);
518 
519  template <class T>
520  friend Writer &operator<<(Writer &w, const T &v) {
521  *cout << v;
522  return w;
523  }
524  template <class T>
525  friend Writer &operator<<(Writer &w, const std::valarray<T> &x) {
526  *cout << w.vinit(x);
527  return w;
528  }
529 };
530 
531 template <>
532 struct ForwardArgs<Writer> : ForwardArgs<Scalar> {
533  typedef std::vector<Scalar> ScalarVector;
534  typedef ForwardArgs<Scalar> Base;
536  bool const_literals;
538  bool indirect;
539  void set_indirect() {
540  indirect = true;
541  ptr.first = 0;
542  ptr.second = 0;
543  }
544  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
545  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
546  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
547  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
548  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
549  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
550  Writer y_const(Index j) {
551  TMBAD_ASSERT2(!indirect, "Attempt to write constants within loop?");
552  return tostr(Base::y(j));
553  }
554  ForwardArgs(IndexVector &inputs, ScalarVector &values)
555  : ForwardArgs<Scalar>(inputs, values) {
556  const_literals = false;
557  indirect = false;
558  }
559 };
560 
561 template <>
562 struct ReverseArgs<Writer> : Args<> {
563  typedef std::vector<Scalar> ScalarVector;
565  bool const_literals;
567  bool indirect;
568  void set_indirect() {
569  indirect = true;
570  ptr.first = 0;
571  ptr.second = 0;
572  }
573  Writer dxd(Index j) { return "d[" + tostr(input(j)) + "]"; }
574  Writer dyd(Index j) { return "d[" + tostr(output(j)) + "]"; }
575  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
576  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
577  Writer dxi(Index j) { return "d[i[" + tostr(Index(ptr.first + j)) + "]]"; }
578  Writer dyi(Index j) { return "d[o[" + tostr(Index(ptr.second + j)) + "]]"; }
579  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
580  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
581  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
582  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
583  Writer dx(Index j) { return (indirect ? dxi(j) : dxd(j)); }
584  Writer dy(Index j) { return (indirect ? dyi(j) : dyd(j)); }
585 
586  ReverseArgs(IndexVector &inputs, ScalarVector &values) : Args<>(inputs) {
587  const_literals = false;
588  indirect = false;
589  ptr.first = (Index)inputs.size();
590  ptr.second = (Index)values.size();
591  }
592 };
593 
594 struct Position {
595  Position(Index node, Index first, Index second);
596  Position();
597  Index node;
598  IndexPair ptr;
599  bool operator<(const Position &other) const;
600 };
601 
603 template <class T>
604 void sort_inplace(std::vector<T> &x) {
605  std::sort(x.begin(), x.end());
606 }
607 
609 template <class T>
610 void sort_unique_inplace(std::vector<T> &x) {
611  std::sort(x.begin(), x.end());
612  typename std::vector<T>::iterator last = std::unique(x.begin(), x.end());
613  x.erase(last, x.end());
614 }
615 
617 struct graph {
618  std::vector<Index> j;
619  std::vector<Index> p;
620  graph();
621  size_t num_neighbors(Index node);
622  Index *neighbors(Index node);
623  bool empty();
624  size_t num_nodes();
625  void print();
628  std::vector<bool> mark;
630  std::vector<Index> inv2op;
632  std::vector<Index> dep2op;
634  std::vector<Index> rowcounts();
636  std::vector<Index> colcounts();
646  void bfs(const std::vector<Index> &start, std::vector<bool> &visited,
647  std::vector<Index> &result);
660  void search(std::vector<Index> &start, bool sort_input = true,
661  bool sort_output = true);
669  void search(std::vector<Index> &start, std::vector<bool> &visited,
670  bool sort_input = true, bool sort_output = true);
676  std::vector<Index> boundary(const std::vector<Index> &subgraph);
681  graph(size_t num_nodes, const std::vector<IndexPair> &edges);
682 };
683 
684 namespace {
685 template <class CompleteOperator, bool dynamic>
686 struct constructOperator {};
687 template <class CompleteOperator>
688 struct constructOperator<CompleteOperator, false> {
689  CompleteOperator *operator()() {
690  static CompleteOperator *pOp = new CompleteOperator();
691  return pOp;
692  }
693 };
694 template <class CompleteOperator>
695 struct constructOperator<CompleteOperator, true> {
696  CompleteOperator *operator()() {
697  CompleteOperator *pOp = new CompleteOperator();
698  return pOp;
699  }
700 
701  template <class T1>
702  CompleteOperator *operator()(const T1 &x1) {
703  CompleteOperator *pOp = new CompleteOperator(x1);
704  return pOp;
705  }
706 
707  template <class T1, class T2>
708  CompleteOperator *operator()(const T1 &x1, const T2 &x2) {
709  CompleteOperator *pOp = new CompleteOperator(x1, x2);
710  return pOp;
711  }
712 
713  template <class T1, class T2, class T3>
714  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3) {
715  CompleteOperator *pOp = new CompleteOperator(x1, x2, x3);
716  return pOp;
717  }
718 
719  template <class T1, class T2, class T3, class T4>
720  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3,
721  const T4 &x4) {
722  CompleteOperator *pOp = new CompleteOperator(x1, x2, x3, x4);
723  return pOp;
724  }
725 };
726 } // namespace
727 
732 struct op_info {
734  typedef int IntRep;
736  IntRep code;
738  enum op_flag {
758  op_flag_count
759  };
760  template <class T>
761  IntRep get_flags(T op) {
762  return
763 
764  (op.dynamic * (1 << dynamic)) |
765  (op.smart_pointer * (1 << smart_pointer)) |
766  (op.is_linear * (1 << is_linear)) |
767  (op.is_constant * (1 << is_constant)) |
768  (op.independent_variable * (1 << independent_variable)) |
769  (op.dependent_variable * (1 << dependent_variable)) |
770  (op.allow_remap * (1 << allow_remap)) |
771  (op.elimination_protected * (1 << elimination_protected)) |
772  (op.updating * (1 << updating));
773  }
774  op_info();
775  op_info(op_flag f);
776 
777  template <class T>
778  op_info(T op) : code(get_flags(op)) {}
780  bool test(op_flag f) const;
781  op_info &operator|=(const op_info &other);
782  op_info &operator&=(const op_info &other);
783 };
784 
797 struct global {
798  struct ad_plain;
799  struct ad_aug;
800  typedef TMBAD_REPLAY_TYPE Replay;
801  struct ad_segment;
802  struct print_config;
811  struct OperatorPure {
814  virtual void increment(IndexPair &ptr) = 0;
817  virtual void decrement(IndexPair &ptr) = 0;
819  virtual void forward(ForwardArgs<Scalar> &args) = 0;
821  virtual void reverse(ReverseArgs<Scalar> &args) = 0;
823  virtual void forward_incr(ForwardArgs<Scalar> &args) = 0;
825  virtual void reverse_decr(ReverseArgs<Scalar> &args) = 0;
827  virtual Index input_size() = 0;
829  virtual Index output_size() = 0;
834  virtual void forward(ForwardArgs<bool> &args) = 0;
839  virtual void reverse(ReverseArgs<bool> &args) = 0;
841  virtual void forward_incr(ForwardArgs<bool> &args) = 0;
843  virtual void reverse_decr(ReverseArgs<bool> &args) = 0;
845  virtual void forward_incr_mark_dense(ForwardArgs<bool> &args) = 0;
859  virtual void dependencies(Args<> &args, Dependencies &dep) = 0;
863  virtual void dependencies_updating(Args<> &args, Dependencies &dep) = 0;
865  virtual void forward(ForwardArgs<Replay> &args) = 0;
867  virtual void reverse(ReverseArgs<Replay> &args) = 0;
869  virtual void forward_incr(ForwardArgs<Replay> &args) = 0;
871  virtual void reverse_decr(ReverseArgs<Replay> &args) = 0;
873  virtual void forward(ForwardArgs<Writer> &args) = 0;
875  virtual void reverse(ReverseArgs<Writer> &args) = 0;
877  virtual void forward_incr(ForwardArgs<Writer> &args) = 0;
879  virtual void reverse_decr(ReverseArgs<Writer> &args) = 0;
881  virtual const char *op_name() { return "NoName"; }
885  virtual OperatorPure *self_fuse() = 0;
889  virtual OperatorPure *other_fuse(OperatorPure *other) = 0;
891  virtual OperatorPure *copy() = 0;
893  virtual void deallocate() = 0;
895  virtual op_info info() = 0;
897  virtual void *operator_data() = 0;
902  virtual void *identifier() = 0;
904  virtual void print(print_config cfg) = 0;
907  virtual void *incomplete() = 0;
908  virtual ~OperatorPure() {}
909  };
910 
917  struct operation_stack : std::vector<OperatorPure *> {
918  typedef std::vector<OperatorPure *> Base;
922  operation_stack();
924  operation_stack(const operation_stack &other);
927  void push_back(OperatorPure *x);
929  operation_stack &operator=(const operation_stack &other);
930  ~operation_stack();
932  void clear();
933  void copy_from(const operation_stack &other);
934  };
935 
940  std::vector<Scalar> values;
943  std::vector<Scalar> derivs;
945  IndexVector inputs;
948  std::vector<Index> inv_index;
951  std::vector<Index> dep_index;
952 
953  mutable std::vector<IndexPair> subgraph_ptr;
954  std::vector<Index> subgraph_seq;
956  void (*forward_compiled)(Scalar *);
958  void (*reverse_compiled)(Scalar *, Scalar *);
959 
960  global();
963  void clear();
964 
980  void shrink_to_fit(double tol = .9);
981 
985  void clear_deriv(Position start = Position(0, 0, 0));
986 
988  Scalar &value_inv(Index i);
990  Scalar &deriv_inv(Index i);
992  Scalar &value_dep(Index i);
994  Scalar &deriv_dep(Index i);
996  Position begin();
998  Position end();
999 
1001  struct no_filter {
1002  CONSTEXPR bool operator[](size_t i) const;
1003  };
1009  template <class ForwardArgs, class NodeFilter>
1010  void forward_loop(ForwardArgs &args, size_t begin,
1011  const NodeFilter &node_filter) const {
1012  for (size_t i = begin; i < opstack.size(); i++) {
1013  if (node_filter[i])
1014  opstack[i]->forward_incr(args);
1015  else
1016  opstack[i]->increment(args.ptr);
1017  }
1018  }
1020  template <class ForwardArgs>
1021  void forward_loop(ForwardArgs &args, size_t begin = 0) const {
1022  forward_loop(args, begin, no_filter());
1023  }
1028  template <class ReverseArgs, class NodeFilter>
1029  void reverse_loop(ReverseArgs &args, size_t begin,
1030  const NodeFilter &node_filter) const {
1031  for (size_t i = opstack.size(); i > begin;) {
1032  i--;
1033  if (node_filter[i])
1034  opstack[i]->reverse_decr(args);
1035  else
1036  opstack[i]->decrement(args.ptr);
1037  }
1038  }
1040  template <class ReverseArgs>
1041  void reverse_loop(ReverseArgs &args, size_t begin = 0) const {
1042  reverse_loop(args, begin, no_filter());
1043  }
1045  template <class ForwardArgs>
1047  subgraph_cache_ptr();
1048  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1049  Index i = subgraph_seq[j];
1050  args.ptr = subgraph_ptr[i];
1051  opstack[i]->forward(args);
1052  }
1053  }
1055  template <class ReverseArgs>
1057  subgraph_cache_ptr();
1058  for (size_t j = subgraph_seq.size(); j > 0;) {
1059  j--;
1060  Index i = subgraph_seq[j];
1061  args.ptr = subgraph_ptr[i];
1062  opstack[i]->reverse(args);
1063  }
1064  }
1075  template <class Vector>
1077  typename Vector::value_type value =
1078  typename Vector::value_type(0)) const {
1079  if (array.size() != values.size()) {
1080  array.resize(values.size());
1081  std::fill(array.begin(), array.end(), value);
1082  return;
1083  }
1084  subgraph_cache_ptr();
1085  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1086  Index i = subgraph_seq[j];
1087  size_t noutput = opstack[i]->output_size();
1088  for (size_t k = 0; k < noutput; k++)
1089  array[subgraph_ptr[i].second + k] = value;
1090  }
1091  }
1092 
1097  void forward(Position start = Position(0, 0, 0));
1105  void reverse(Position start = Position(0, 0, 0));
1107  void forward_sub();
1109  void reverse_sub();
1110 
1112  void forward(std::vector<bool> &marks);
1114  void reverse(std::vector<bool> &marks);
1119  void forward_sub(std::vector<bool> &marks,
1120  const std::vector<bool> &node_filter = std::vector<bool>());
1125  void reverse_sub(std::vector<bool> &marks,
1126  const std::vector<bool> &node_filter = std::vector<bool>());
1135  void forward_dense(std::vector<bool> &marks);
1136 
1137  intervals<Index> updating_intervals() const;
1138 
1139  intervals<Index> updating_intervals_sub() const;
1140 
1141  struct replay {
1143  std::vector<Replay> values;
1146  std::vector<Replay> derivs;
1148  const global &orig;
1150  global &target;
1152  global *parent_glob;
1154  Replay &value_inv(Index i);
1156  Replay &deriv_inv(Index i);
1158  Replay &value_dep(Index i);
1160  Replay &deriv_dep(Index i);
1164  replay(const global &orig, global &target);
1173  void start();
1178  void stop();
1180  void add_updatable_derivs(const intervals<Index> &I);
1182  void clear_deriv();
1189  void forward(bool inv_tags = true, bool dep_tags = true,
1190  Position start = Position(0, 0, 0),
1191  const std::vector<bool> &node_filter = std::vector<bool>());
1199  void reverse(bool dep_tags = true, bool inv_tags = false,
1200  Position start = Position(0, 0, 0),
1201  const std::vector<bool> &node_filter = std::vector<bool>());
1203  void forward_sub();
1205  void reverse_sub();
1207  void clear_deriv_sub();
1208  };
1209 
1214  void forward_replay(bool inv_tags = true, bool dep_tags = true);
1215 
1221  void subgraph_cache_ptr() const;
1229  void set_subgraph(const std::vector<bool> &marks, bool append = false);
1231  void mark_subgraph(std::vector<bool> &marks);
1233  void unmark_subgraph(std::vector<bool> &marks);
1235  void subgraph_trivial();
1241  void clear_deriv_sub();
1274  global extract_sub(std::vector<Index> &var_remap, global new_glob = global());
1279  void extract_sub_inplace(std::vector<bool> marks);
1283  global extract_sub();
1284 
1293  std::vector<Index> var2op();
1299  std::vector<bool> var2op(const std::vector<bool> &values);
1301  std::vector<Index> op2var(const std::vector<Index> &seq);
1303  std::vector<bool> op2var(const std::vector<bool> &seq_mark);
1312  std::vector<Index> op2idx(const std::vector<Index> &var_subset,
1313  Index NA = (Index)-1);
1315  std::vector<bool> mark_space(size_t n, const std::vector<Index> ind);
1317  std::vector<bool> inv_marks();
1319  std::vector<bool> dep_marks();
1321  std::vector<bool> subgraph_marks();
1322 
1323  struct append_edges {
1324  size_t &i;
1325  const std::vector<bool> &keep_var;
1326  std::vector<Index> &var2op;
1327  std::vector<IndexPair> &edges;
1328 
1329  std::vector<bool> op_marks;
1330  size_t pos;
1331  append_edges(size_t &i, size_t num_nodes, const std::vector<bool> &keep_var,
1332  std::vector<Index> &var2op, std::vector<IndexPair> &edges);
1333  void operator()(Index dep_j);
1334 
1335  void start_iteration();
1336 
1337  void end_iteration();
1338  };
1347  graph build_graph(bool transpose, const std::vector<bool> &keep_var);
1351  graph forward_graph(std::vector<bool> keep_var = std::vector<bool>(0));
1355  graph reverse_graph(std::vector<bool> keep_var = std::vector<bool>(0));
1356 
1361  bool identical(const global &other) const;
1362 
1364  template <class T>
1365  void hash(hash_t &h, T x) const {
1366  static const size_t n =
1367  (sizeof(T) / sizeof(hash_t)) + (sizeof(T) % sizeof(hash_t) != 0);
1368  hash_t buffer[n];
1369  std::fill(buffer, buffer + n, 0);
1370  for (size_t i = 0; i < sizeof(x); i++)
1371  ((char *)buffer)[i] = ((char *)&x)[i];
1372  hash_t A = 54059;
1373  hash_t B = 76963;
1374  for (size_t i = 0; i < n; i++) h = (A * h) ^ (B * buffer[i]);
1375  }
1376 
1385  hash_t hash() const;
1386 
1388  struct hash_config {
1398  bool reduce;
1402  std::vector<Index> inv_seed;
1403  };
1404 
1459  std::vector<hash_t> hash_sweep(hash_config cfg) const;
1461  std::vector<hash_t> hash_sweep(bool weak = true) const;
1462 
1476  void eliminate();
1477 
1479  struct print_config {
1480  std::string prefix, mark;
1481  int depth;
1482  print_config();
1483  };
1485  void print(print_config cfg);
1487  void print();
1488 
1490  template <int ninput_, int noutput_ = 1>
1491  struct Operator {
1493  static const bool dynamic = false;
1495  static const int ninput = ninput_;
1497  static const int noutput = noutput_;
1499  static const int independent_variable = false;
1501  static const int dependent_variable = false;
1503  static const bool have_input_size_output_size = false;
1505  static const bool have_increment_decrement = false;
1507  static const bool have_forward_reverse = true;
1509  static const bool have_forward_incr_reverse_decr = false;
1511  static const bool have_forward_mark_reverse_mark = false;
1513  static const bool have_dependencies = false;
1519  static const bool allow_remap = true;
1530  static const bool implicit_dependencies = false;
1532  static const bool add_static_identifier = false;
1535  static const bool add_forward_replay_copy = false;
1538  static const bool have_eval = false;
1540  static const int max_fuse_depth = 2;
1542  static const bool is_linear = false;
1544  static const bool is_constant = false;
1546  static const bool smart_pointer = false;
1548  static const bool elimination_protected = false;
1574  static const bool updating = false;
1577  void dependencies_updating(Args<> &args, Dependencies &dep) const {}
1580  return NULL;
1581  }
1583  void *operator_data() { return NULL; }
1585  void print(print_config cfg) {}
1586  };
1589  template <int ninput, int noutput>
1590  struct DynamicOperator : Operator<ninput, noutput> {
1592  static const bool dynamic = true;
1594  static const int max_fuse_depth = 0;
1595  };
1598  template <int ninput>
1599  struct DynamicOutputOperator : Operator<ninput, -1> {
1601  static const bool dynamic = true;
1603  static const int max_fuse_depth = 0;
1604  Index noutput;
1605  };
1606  template <int noutput = 1>
1607  struct DynamicInputOperator : Operator<-1, noutput> {
1609  static const bool dynamic = true;
1611  static const int max_fuse_depth = 0;
1612  Index ninput;
1613  };
1614  struct DynamicInputOutputOperator : Operator<-1, -1> {
1616  static const bool dynamic = true;
1618  static const int max_fuse_depth = 0;
1619  Index ninput_, noutput_;
1620  DynamicInputOutputOperator(Index ninput, Index noutput);
1621  Index input_size() const;
1622  Index output_size() const;
1623  static const bool have_input_size_output_size = true;
1624  };
1625  struct UniqueDynamicOperator : Operator<-1, -1> {
1627  static const bool dynamic = true;
1629  static const int max_fuse_depth = 0;
1631  static const bool smart_pointer = false;
1634  static const bool have_input_size_output_size = true;
1635  };
1636  struct SharedDynamicOperator : UniqueDynamicOperator {
1638  static const bool smart_pointer = true;
1639  };
1640 
1643  template <class OperatorBase>
1644  struct AddInputSizeOutputSize : OperatorBase {
1645  INHERIT_CTOR(AddInputSizeOutputSize, OperatorBase)
1646  Index input_size() const { return this->ninput; }
1647  Index output_size() const { return this->noutput; }
1648  static const bool have_input_size_output_size = true;
1649  };
1650 
1653  template <class OperatorBase>
1654  struct AddIncrementDecrement : OperatorBase {
1655  INHERIT_CTOR(AddIncrementDecrement, OperatorBase)
1656  void increment(IndexPair &ptr) {
1657  ptr.first += this->input_size();
1658  ptr.second += this->output_size();
1659  }
1660  void decrement(IndexPair &ptr) {
1661  ptr.first -= this->input_size();
1662  ptr.second -= this->output_size();
1663  }
1664  static const bool have_increment_decrement = true;
1665  };
1666 
1670  template <class OperatorBase>
1671  struct AddForwardReverse : OperatorBase {
1672  INHERIT_CTOR(AddForwardReverse, OperatorBase)
1673 
1674  template <class Type>
1675  void forward(ForwardArgs<Type> &args) {
1676  ForwardArgs<Type> args_cpy(args);
1677  OperatorBase::forward_incr(args_cpy);
1678  }
1679  template <class Type>
1680  void reverse(ReverseArgs<Type> &args) {
1681  ReverseArgs<Type> args_cpy(args);
1682  OperatorBase::increment(args_cpy.ptr);
1683  OperatorBase::reverse_decr(args_cpy);
1684  }
1685  static const bool have_forward_reverse = true;
1686  };
1687 
1691  template <class OperatorBase>
1692  struct AddForwardIncrReverseDecr : OperatorBase {
1693  INHERIT_CTOR(AddForwardIncrReverseDecr, OperatorBase)
1694 
1695  template <class Type>
1696  void forward_incr(ForwardArgs<Type> &args) {
1697  OperatorBase::forward(args);
1698  OperatorBase::increment(args.ptr);
1699  }
1700 
1701  template <class Type>
1702  void reverse_decr(ReverseArgs<Type> &args) {
1703  OperatorBase::decrement(args.ptr);
1704  OperatorBase::reverse(args);
1705  }
1706  static const bool have_forward_incr_reverse_decr = true;
1707  };
1708 
1711  template <class OperatorBase>
1712  struct AddForwardMarkReverseMark : OperatorBase {
1713  INHERIT_CTOR(AddForwardMarkReverseMark, OperatorBase)
1714 
1715  template <class Type>
1716  void forward(ForwardArgs<Type> &args) {
1717  OperatorBase::forward(args);
1718  }
1719  template <class Type>
1720  void reverse(ReverseArgs<Type> &args) {
1721  OperatorBase::reverse(args);
1722  }
1723 
1724  void forward(ForwardArgs<bool> &args) { args.mark_dense(*this); }
1725  void reverse(ReverseArgs<bool> &args) { args.mark_dense(*this); }
1726  static const bool have_forward_mark_reverse_mark = true;
1727  };
1728 
1731  template <class OperatorBase>
1732  struct AddDependencies : OperatorBase {
1733  INHERIT_CTOR(AddDependencies, OperatorBase)
1734  void dependencies(Args<> &args, Dependencies &dep) const {
1735  Index ninput_ = this->input_size();
1736  for (Index j = 0; j < ninput_; j++) dep.push_back(args.input(j));
1737  }
1738  static const bool have_dependencies = true;
1739  };
1740 
1743  template <class OperatorBase, int ninput>
1744  struct AddForwardFromEval : OperatorBase {};
1746  template <class OperatorBase>
1747  struct AddForwardFromEval<OperatorBase, 1> : OperatorBase {
1748  INHERIT_CTOR(AddForwardFromEval, OperatorBase)
1749  template <class Type>
1750  void forward(ForwardArgs<Type> &args) {
1751  args.y(0) = this->eval(args.x(0));
1752  }
1753  };
1755  template <class OperatorBase>
1756  struct AddForwardFromEval<OperatorBase, 2> : OperatorBase {
1757  INHERIT_CTOR(AddForwardFromEval, OperatorBase)
1758  template <class Type>
1759  void forward(ForwardArgs<Type> &args) {
1760  args.y(0) = this->eval(args.x(0), args.x(1));
1761  }
1762  };
1763 
1765  template <bool flag, class dummy>
1767  void increment() {}
1768  void decrement() {}
1769  size_t operator()() const { return 0; }
1770  };
1771  template <class dummy>
1772  struct ReferenceCounter<true, dummy> {
1773  size_t counter;
1774  ReferenceCounter() : counter(0) {}
1775  void increment() { counter++; }
1776  void decrement() { counter--; }
1777  size_t operator()() const { return counter; }
1778  };
1779 
1781  template <bool flag, class Yes, class No>
1782  struct if_else {};
1783  template <class Yes, class No>
1784  struct if_else<true, Yes, No> {
1785  typedef Yes type;
1786  };
1787  template <class Yes, class No>
1788  struct if_else<false, Yes, No> {
1789  typedef No type;
1790  };
1791 
1793  template <class OperatorBase>
1794  struct CPL {
1795  static const bool test1 = !OperatorBase::have_eval;
1797  typedef typename if_else<
1798  test1, OperatorBase,
1800 
1801  static const bool test2 = Result1::have_input_size_output_size;
1803  typedef
1806 
1807  static const bool test3 = !Result2::have_dependencies;
1809  typedef typename if_else<test3, AddDependencies<Result2>, Result2>::type
1811 
1812  static const bool test4 = Result3::have_increment_decrement;
1814  typedef
1817 
1818  static const bool test5 = Result4::have_forward_mark_reverse_mark;
1820  typedef typename if_else<test5, Result4,
1822 
1823  static const bool test6 = Result5::have_forward_reverse &&
1824  !Result5::have_forward_incr_reverse_decr;
1827  Result5>::type Result6;
1828 
1829  static const bool test7 = Result6::have_forward_incr_reverse_decr &&
1830  !Result6::have_forward_reverse;
1832  typedef typename if_else<test7, AddForwardReverse<Result6>, Result6>::type
1834 
1835  typedef Result7 type;
1836  };
1837 
1839  template <class Operator1, class Operator2>
1840  struct Fused : Operator<Operator1::ninput + Operator2::ninput,
1841  Operator1::noutput + Operator2::noutput> {
1842  typename CPL<Operator1>::type Op1;
1843  typename CPL<Operator2>::type Op2;
1845  static const int independent_variable =
1846  Operator1::independent_variable && Operator2::independent_variable;
1848  static const int dependent_variable =
1849  Operator1::dependent_variable && Operator2::dependent_variable;
1851  static const int max_fuse_depth =
1852  (Operator1::max_fuse_depth < Operator2::max_fuse_depth
1853  ? Operator1::max_fuse_depth - 1
1854  : Operator2::max_fuse_depth - 1);
1856  static const bool is_linear = Operator1::is_linear && Operator2::is_linear;
1857  template <class Type>
1858  void forward_incr(ForwardArgs<Type> &args) {
1859  Op1.forward_incr(args);
1860  Op2.forward_incr(args);
1861  }
1862  template <class Type>
1863  void reverse_decr(ReverseArgs<Type> &args) {
1864  Op2.reverse_decr(args);
1865  Op1.reverse_decr(args);
1866  }
1868  static const bool have_forward_incr_reverse_decr = true;
1870  static const bool have_forward_reverse = false;
1871  const char *op_name() { return "Fused"; }
1872  };
1881  template <class Operator1>
1882  struct Rep : DynamicOperator<-1, -1> {
1883  typename CPL<Operator1>::type Op;
1885  static const int independent_variable = Operator1::independent_variable;
1887  static const int dependent_variable = Operator1::dependent_variable;
1889  static const bool is_linear = Operator1::is_linear;
1890  Index n;
1891  Rep(Index n) : n(n) {}
1892  Index input_size() const { return Operator1::ninput * n; }
1893  Index output_size() const { return Operator1::noutput * n; }
1895  static const bool have_input_size_output_size = true;
1896  template <class Type>
1897  void forward_incr(ForwardArgs<Type> &args) {
1898  for (size_t i = 0; i < (size_t)n; i++) Op.forward_incr(args);
1899  }
1900  template <class Type>
1901  void reverse_decr(ReverseArgs<Type> &args) {
1902  for (size_t i = 0; i < (size_t)n; i++) Op.reverse_decr(args);
1903  }
1905  static const bool have_forward_incr_reverse_decr = true;
1907  static const bool have_forward_reverse = false;
1914  TMBAD_ASSERT(false);
1915  std::vector<Index> &inputs = get_glob()->inputs;
1916  size_t k = Op.input_size();
1917  size_t start = inputs.size() - k * n;
1918  std::valarray<Index> increment(k);
1919  if (k > 0) {
1920  for (size_t i = 0; i < (size_t)n - 1; i++) {
1921  std::valarray<Index> v1(&inputs[start + i * k], k);
1922  std::valarray<Index> v2(&inputs[start + (i + 1) * k], k);
1923  if (i == 0) {
1924  increment = v2 - v1;
1925  } else {
1926  bool ok = (increment == (v2 - v1)).min();
1927  if (!ok) return NULL;
1928  }
1929  }
1930  }
1931 
1932  size_t reduction = (n - 1) * k;
1933  inputs.resize(inputs.size() - reduction);
1934  return get_glob()->getOperator<RepCompress<Operator1> >(n, increment);
1935  }
1936  OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
1937  OperatorPure *op1 = get_glob()->getOperator<Operator1>();
1938  if (op1 == other) {
1939  this->n++;
1940  return self;
1941  }
1942  return NULL;
1943  }
1944  const char *op_name() { return "Rep"; }
1945  };
1956  template <class Operator1>
1957  struct RepCompress : DynamicOperator<-1, -1> {
1959  static const int independent_variable = Operator1::independent_variable;
1961  static const int dependent_variable = Operator1::dependent_variable;
1963  static const bool is_linear = Operator1::is_linear;
1964  typename CPL<Operator1>::type Op;
1965  Index n;
1966 
1967  std::valarray<Index> increment_pattern;
1968  RepCompress(Index n, std::valarray<Index> v) : n(n), increment_pattern(v) {}
1969  Index input_size() const { return Operator1::ninput; }
1970  Index output_size() const { return Operator1::noutput * n; }
1972  static const bool have_input_size_output_size = true;
1974  template <class Type>
1976  std::valarray<Index> inputs(input_size());
1977  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
1978  ForwardArgs<Type> args_cpy = args;
1979  args_cpy.inputs = &inputs[0];
1980  args_cpy.ptr.first = 0;
1981  for (size_t i = 0; i < (size_t)n; i++) {
1982  Op.forward(args_cpy);
1983  inputs += this->increment_pattern;
1984  args_cpy.ptr.second += Op.output_size();
1985  }
1986  }
1988  template <class Type>
1990  std::valarray<Index> inputs(input_size());
1991  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
1992  inputs += n * this->increment_pattern;
1993  ReverseArgs<Type> args_cpy = args;
1994  args_cpy.inputs = &inputs[0];
1995  args_cpy.ptr.first = 0;
1996  args_cpy.ptr.second += n * Op.output_size();
1997  for (size_t i = 0; i < (size_t)n; i++) {
1998  inputs -= this->increment_pattern;
1999  args_cpy.ptr.second -= Op.output_size();
2000  Op.reverse(args_cpy);
2001  }
2002  }
2004  void dependencies(Args<> &args, Dependencies &dep) const {
2005  std::valarray<Index> inputs(input_size());
2006  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
2007  for (size_t i = 0; i < (size_t)n; i++) {
2008  dep.insert(dep.end(), &inputs[0], &inputs[0] + inputs.size());
2009  inputs += this->increment_pattern;
2010  }
2011  }
2012  static const bool have_dependencies = true;
2013  void forward(ForwardArgs<Writer> &args) {
2014  std::valarray<Index> inputs(Op.input_size());
2015  for (size_t i = 0; i < (size_t)Op.input_size(); i++)
2016  inputs[i] = args.input(i);
2017  std::valarray<Index> outputs(Op.output_size());
2018  for (size_t i = 0; i < (size_t)Op.output_size(); i++)
2019  outputs[i] = args.output(i);
2020  Writer w;
2021  int ninp = Op.input_size();
2022  int nout = Op.output_size();
2023 
2024  w << "for (int count = 0, "
2025  << "i[" << ninp << "]=" << inputs << ", "
2026  << "di[" << ninp << "]=" << increment_pattern << ", "
2027  << "o[" << nout << "]=" << outputs << "; "
2028  << "count < " << n << "; count++) {\n";
2029 
2030  w << " ";
2031  ForwardArgs<Writer> args_cpy = args;
2032  args_cpy.set_indirect();
2033  Op.forward(args_cpy);
2034  w << "\n";
2035 
2036  w << " ";
2037  w << "for (int k=0; k<" << ninp << "; k++) i[k] += di[k];\n";
2038  w << " ";
2039  w << "for (int k=0; k<" << nout << "; k++) o[k] += " << nout << ";\n";
2040 
2041  w << " ";
2042  w << "}";
2043  }
2044  void reverse(ReverseArgs<Writer> &args) {
2045  std::valarray<Index> inputs(Op.input_size());
2046  for (size_t i = 0; i < (size_t)Op.input_size(); i++)
2047  inputs[i] = args.input(i);
2048  inputs += n * increment_pattern;
2049  std::valarray<Index> outputs(Op.output_size());
2050  for (size_t i = 0; i < (size_t)Op.output_size(); i++)
2051  outputs[i] = args.output(i);
2052  outputs += n * Op.output_size();
2053  Writer w;
2054  int ninp = Op.input_size();
2055  int nout = Op.output_size();
2056 
2057  w << "for (int count = 0, "
2058  << "i[" << ninp << "]=" << inputs << ", "
2059  << "di[" << ninp << "]=" << increment_pattern << ", "
2060  << "o[" << nout << "]=" << outputs << "; "
2061  << "count < " << n << "; count++) {\n";
2062 
2063  w << " ";
2064  w << "for (int k=0; k<" << ninp << "; k++) i[k] -= di[k];\n";
2065  w << " ";
2066  w << "for (int k=0; k<" << nout << "; k++) o[k] -= " << nout << ";\n";
2067 
2068  w << " ";
2069  ReverseArgs<Writer> args_cpy = args;
2070  args_cpy.set_indirect();
2071  Op.reverse(args_cpy);
2072  w << "\n";
2073 
2074  w << " ";
2075  w << "}";
2076  }
2078  static const bool have_forward_incr_reverse_decr = false;
2080  static const bool have_forward_reverse = true;
2082  static const bool have_forward_mark_reverse_mark = true;
2083  const char *op_name() { return "CRep"; }
2084 
2085  struct operator_data_t {
2086  OperatorPure *Op;
2087  Index n;
2088  std::valarray<Index> ip;
2089  operator_data_t(const RepCompress &x)
2090  : Op(get_glob()->getOperator<Operator1>()),
2091  n(x.n),
2092  ip(x.increment_pattern) {}
2093  ~operator_data_t() { Op->deallocate(); }
2094  bool operator==(const operator_data_t &other) {
2095  return (Op == other.Op) && (ip.size() == other.ip.size()) &&
2096  ((ip - other.ip).min() == 0);
2097  }
2098  };
2099  void *operator_data() { return new operator_data_t(*this); }
2100  OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
2101  if (this->op_name() == other->op_name()) {
2102  operator_data_t *p1 =
2103  static_cast<operator_data_t *>(self->operator_data());
2104  operator_data_t *p2 =
2105  static_cast<operator_data_t *>(other->operator_data());
2106  bool match = (*p1 == *p2);
2107  int other_n = p2->n;
2108  delete p1;
2109  delete p2;
2110  if (match) {
2111  std::vector<Index> &inputs = get_glob()->inputs;
2112  size_t reduction = increment_pattern.size();
2113  inputs.resize(inputs.size() - reduction);
2114  this->n += other_n;
2115  other->deallocate();
2116  return self;
2117  }
2118  }
2119  return NULL;
2120  }
2121  };
2122 
2128  template <class OperatorBase>
2130  typename CPL<OperatorBase>::type Op;
2131  INHERIT_CTOR(Complete, Op)
2132  ~Complete() {}
2133  void forward(ForwardArgs<Scalar> &args) { Op.forward(args); }
2134  void reverse(ReverseArgs<Scalar> &args) { Op.reverse(args); }
2135  void forward_incr(ForwardArgs<Scalar> &args) { Op.forward_incr(args); }
2136  void reverse_decr(ReverseArgs<Scalar> &args) { Op.reverse_decr(args); }
2137 
2139  if (Op.add_forward_replay_copy)
2140  forward_replay_copy(args);
2141  else
2142  Op.forward(args);
2143  }
2144  void reverse(ReverseArgs<Replay> &args) { Op.reverse(args); }
2146  if (Op.add_forward_replay_copy) {
2147  forward_replay_copy(args);
2148  increment(args.ptr);
2149  } else
2150  Op.forward_incr(args);
2151  }
2152  void reverse_decr(ReverseArgs<Replay> &args) { Op.reverse_decr(args); }
2153 
2154  void forward(ForwardArgs<bool> &args) { Op.forward(args); }
2155  void reverse(ReverseArgs<bool> &args) { Op.reverse(args); }
2156  void forward_incr(ForwardArgs<bool> &args) { Op.forward_incr(args); }
2157  void reverse_decr(ReverseArgs<bool> &args) { Op.reverse_decr(args); }
2159  args.mark_dense(Op);
2160  Op.increment(args.ptr);
2161  };
2162 
2163  void forward(ForwardArgs<Writer> &args) { Op.forward(args); }
2164  void reverse(ReverseArgs<Writer> &args) { Op.reverse(args); }
2165  void forward_incr(ForwardArgs<Writer> &args) { Op.forward_incr(args); }
2166  void reverse_decr(ReverseArgs<Writer> &args) { Op.reverse_decr(args); }
2171  std::vector<ad_plain> operator()(const std::vector<ad_plain> &x) {
2172  TMBAD_ASSERT2(OperatorBase::dynamic,
2173  "Stack to heap copy only allowed for dynamic operators");
2174  Complete *pOp = new Complete(*this);
2175  TMBAD_ASSERT2(pOp->ref_count() == 0, "Operator already on the heap");
2176  pOp->ref_count.increment();
2177  return get_glob()->add_to_stack<OperatorBase>(pOp, x);
2178  }
2179  ad_segment operator()(const ad_segment &x) {
2180  TMBAD_ASSERT2(OperatorBase::dynamic,
2181  "Stack to heap copy only allowed for dynamic operators");
2182  Complete *pOp = new Complete(*this);
2183  TMBAD_ASSERT2(pOp->ref_count() == 0, "Operator already on the heap");
2184  pOp->ref_count.increment();
2185  return get_glob()->add_to_stack<OperatorBase>(pOp, x);
2186  }
2187  ad_segment operator()(const ad_segment &x, const ad_segment &y) {
2188  TMBAD_ASSERT2(OperatorBase::dynamic,
2189  "Stack to heap copy only allowed for dynamic operators");
2190  Complete *pOp = new Complete(*this);
2191  TMBAD_ASSERT2(pOp->ref_count() == 0, "Operator already on the heap");
2192  pOp->ref_count.increment();
2193  return get_glob()->add_to_stack<OperatorBase>(pOp, x, y);
2194  }
2195  template <class T>
2196  std::vector<T> operator()(const std::vector<T> &x) {
2197  std::vector<ad_plain> x_(x.begin(), x.end());
2198  std::vector<ad_plain> y_ = (*this)(x_);
2199  std::vector<T> y(y_.begin(), y_.end());
2200  return y;
2201  }
2202  void forward_replay_copy(ForwardArgs<Replay> &args) {
2203  std::vector<ad_plain> x(Op.input_size());
2204  for (size_t i = 0; i < x.size(); i++) x[i] = args.x(i);
2205  std::vector<ad_plain> y =
2206  get_glob()->add_to_stack<OperatorBase>(this->copy(), x);
2207  for (size_t i = 0; i < y.size(); i++) args.y(i) = y[i];
2208  }
2209  void dependencies(Args<> &args, Dependencies &dep) {
2210  Op.dependencies(args, dep);
2211  }
2212  void dependencies_updating(Args<> &args, Dependencies &dep) {
2213  Op.dependencies_updating(args, dep);
2214  }
2215  void increment(IndexPair &ptr) { Op.increment(ptr); }
2216  void decrement(IndexPair &ptr) { Op.decrement(ptr); }
2217  Index input_size() { return Op.input_size(); }
2218  Index output_size() { return Op.output_size(); }
2219  const char *op_name() { return Op.op_name(); }
2220  void print(print_config cfg) { Op.print(cfg); }
2221 
2222  template <class Operator_, int depth>
2223  struct SelfFuse {
2224  typedef Rep<Operator_> type;
2225  OperatorPure *operator()() {
2226  return get_glob()->template getOperator<type>(2);
2227  }
2228  };
2229  template <class Operator_>
2230  struct SelfFuse<Operator_, 0> {
2231  OperatorPure *operator()() { return NULL; }
2232  };
2234  return SelfFuse<OperatorBase, OperatorBase::max_fuse_depth>()();
2235  }
2237  return Op.other_fuse(this, other);
2238  }
2241  if (Op.smart_pointer) {
2242  ref_count.increment();
2243  return this;
2244  } else if (Op.dynamic)
2245  return new Complete(*this);
2246  else
2247  return this;
2248  }
2249  void deallocate() {
2250  if (!Op.dynamic) return;
2251  if (Op.smart_pointer) {
2252  if (ref_count() > 1) {
2253  ref_count.decrement();
2254  return;
2255  }
2256  }
2257  delete this;
2258  }
2260  op_info info(Op);
2261  return info;
2262  }
2263  void *identifier() {
2264  if (Op.add_static_identifier) {
2265  static void *id = new char();
2266  return id;
2267  } else
2268  return (void *)this;
2269  }
2270  void *operator_data() { return Op.operator_data(); }
2271  void *incomplete() { return &Op; }
2272  };
2273 
2274  template <class OperatorBase>
2275  Complete<OperatorBase> *getOperator() const {
2276  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()();
2277  }
2278  template <class OperatorBase, class T1>
2279  Complete<OperatorBase> *getOperator(const T1 &x1) const {
2280  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2281  x1);
2282  }
2283  template <class OperatorBase, class T1, class T2>
2284  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2) const {
2285  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2286  x1, x2);
2287  }
2288  template <class OperatorBase, class T1, class T2, class T3>
2289  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2,
2290  const T3 &x3) const {
2291  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2292  x1, x2, x3);
2293  }
2294  template <class OperatorBase, class T1, class T2, class T3, class T4>
2295  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2, const T3 &x3,
2296  const T4 &x4) const {
2297  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2298  x1, x2, x3, x4);
2299  }
2300  struct InvOp : Operator<0> {
2301  static const int independent_variable = true;
2302  template <class Type>
2303  void forward(ForwardArgs<Type> &args) {}
2304  template <class Type>
2305  void reverse(ReverseArgs<Type> &args) {}
2306  const char *op_name();
2307  };
2308 
2309  struct DepOp : Operator<1> {
2310  static const bool is_linear = true;
2311  static const int dependent_variable = true;
2312  static const bool have_eval = true;
2313  template <class Type>
2314  Type eval(Type x0) {
2315  return x0;
2316  }
2317  template <class Type>
2318  void reverse(ReverseArgs<Type> &args) {
2319  args.dx(0) += args.dy(0);
2320  }
2321  const char *op_name();
2322  };
2323 
2324  struct ConstOp : Operator<0, 1> {
2325  static const bool is_linear = true;
2326  static const bool is_constant = true;
2327  template <class Type>
2328  void forward(ForwardArgs<Type> &args) {}
2329  void forward(ForwardArgs<Replay> &args);
2330  template <class Type>
2331  void reverse(ReverseArgs<Type> &args) {}
2332  const char *op_name();
2333  void forward(ForwardArgs<Writer> &args);
2334  };
2335  struct DataOp : DynamicOutputOperator<0> {
2336  typedef DynamicOutputOperator<0> Base;
2337  static const bool is_linear = true;
2338  DataOp(Index n);
2339  template <class Type>
2340  void forward(ForwardArgs<Type> &args) {}
2341  template <class Type>
2342  void reverse(ReverseArgs<Type> &args) {}
2343  const char *op_name();
2344  void forward(ForwardArgs<Writer> &args);
2345  };
2356  static const bool add_forward_replay_copy = true;
2357  ZeroOp(Index n);
2358  template <class Type>
2359  void forward(ForwardArgs<Type> &args) {
2360  for (Index i = 0; i < Base::noutput; i++) args.y(i) = Type(0);
2361  }
2362  template <class Type>
2363  void reverse(ReverseArgs<Type> &args) {}
2364  const char *op_name();
2365  void forward(ForwardArgs<Writer> &args);
2368  void operator()(Replay *x, Index n);
2369  };
2371  struct NullOp : Operator<0, 0> {
2372  NullOp();
2373  const char *op_name();
2374  template <class T>
2375  void forward(ForwardArgs<T> &args) {}
2376  template <class T>
2377  void reverse(ReverseArgs<T> &args) {}
2378  };
2380  struct NullOp2 : DynamicInputOutputOperator {
2381  NullOp2(Index ninput, Index noutput);
2382  const char *op_name();
2383  template <class T>
2384  void forward(ForwardArgs<T> &args) {}
2385  template <class T>
2386  void reverse(ReverseArgs<T> &args) {}
2387  };
2408  struct RefOp : DynamicOperator<0, 1> {
2409  static const bool dynamic = true;
2410  global *glob;
2411  Index i;
2412  RefOp(global *glob, Index i);
2414  void forward(ForwardArgs<Scalar> &args);
2416  void forward(ForwardArgs<Replay> &args);
2419  template <class Type>
2421  TMBAD_ASSERT2(false,
2422  "Reverse mode updates are forbidden until all references "
2423  "are resolved");
2424  }
2426  void reverse(ReverseArgs<Replay> &args);
2427  const char *op_name();
2428  };
2429 
2430  typedef Operator<1> UnaryOperator;
2431  typedef Operator<2> BinaryOperator;
2432 
2433  OperatorPure *Fuse(OperatorPure *Op1, OperatorPure *Op2);
2434 
2435  static bool fuse;
2436 
2441  void set_fuse(bool flag);
2442 
2445  void add_to_opstack(OperatorPure *pOp);
2447  template <class OperatorBase>
2448  ad_plain add_to_stack(Scalar result = 0) {
2449  ad_plain ans;
2450  ans.index = this->values.size();
2451 
2452  this->values.push_back(result);
2453 
2454  Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
2455  add_to_opstack(pOp);
2456 
2457  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2458  return ans;
2459  }
2461  template <class OperatorBase>
2462  ad_plain add_to_stack(const ad_plain &x) {
2463  ad_plain ans;
2464  ans.index = this->values.size();
2465 
2466  this->values.push_back(OperatorBase().eval(x.Value()));
2467 
2468  this->inputs.push_back(x.index);
2469 
2470  Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
2471  add_to_opstack(pOp);
2472 
2473  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2474  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2475  return ans;
2476  }
2478  template <class OperatorBase>
2479  ad_plain add_to_stack(const ad_plain &x, const ad_plain &y) {
2480  ad_plain ans;
2481  ans.index = this->values.size();
2482 
2483  this->values.push_back(OperatorBase().eval(x.Value(), y.Value()));
2484 
2485  this->inputs.push_back(x.index);
2486  this->inputs.push_back(y.index);
2487 
2488  Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
2489  add_to_opstack(pOp);
2490 
2491  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2492  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2493  return ans;
2494  }
2495  template <class OperatorBase>
2496  ad_segment add_to_stack(ad_segment lhs, ad_segment rhs,
2497  ad_segment more = ad_segment()) {
2498  IndexPair ptr((Index)inputs.size(), (Index)values.size());
2499  Complete<OperatorBase> *pOp =
2500  this->template getOperator<OperatorBase>(lhs, rhs);
2501  size_t n = pOp->output_size();
2502  ad_segment ans(values.size(), n);
2503  inputs.push_back(lhs.index());
2504  inputs.push_back(rhs.index());
2505  if (more.size() > 0) inputs.push_back(more.index());
2506  opstack.push_back(pOp);
2507  values.resize(values.size() + n);
2508  ForwardArgs<Scalar> args(inputs, values, this);
2509  args.ptr = ptr;
2510  pOp->forward(args);
2511 
2512  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2513  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2514  return ans;
2515  }
2516 
2517  template <class OperatorBase>
2518  ad_segment add_to_stack(Complete<OperatorBase> *pOp, ad_segment lhs,
2519  ad_segment rhs = ad_segment()) {
2520  static_assert(
2521  OperatorBase::dynamic,
2522  "Unlikely that you want to use this method for static operators?");
2523  static_assert(
2524  OperatorBase::ninput == 0 || OperatorBase::implicit_dependencies,
2525  "Operators with pointer inputs should always implement "
2526  "'implicit_dependencies'");
2527 
2528  IndexPair ptr((Index)inputs.size(), (Index)values.size());
2529  size_t n = pOp->output_size();
2530  ad_segment ans(values.size(), n);
2531  TMBAD_ASSERT((Index)(lhs.size() > 0) + (Index)(rhs.size() > 0) ==
2532  pOp->input_size());
2533  if (lhs.size() > 0) inputs.push_back(lhs.index());
2534  if (rhs.size() > 0) inputs.push_back(rhs.index());
2535  opstack.push_back(pOp);
2536  values.resize(values.size() + n);
2537  ForwardArgs<Scalar> args(inputs, values, this);
2538  args.ptr = ptr;
2539  pOp->forward(args);
2540 
2541  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2542  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2543  return ans;
2544  }
2547  template <class OperatorBase>
2548  std::vector<ad_plain> add_to_stack(OperatorPure *pOp,
2549  const std::vector<ad_plain> &x) {
2550  IndexPair ptr((Index)inputs.size(), (Index)values.size());
2551  size_t m = pOp->input_size();
2552  size_t n = pOp->output_size();
2553  ad_segment ans(values.size(), n);
2554  for (size_t i = 0; i < m; i++) inputs.push_back(x[i].index);
2555  opstack.push_back(pOp);
2556  values.resize(values.size() + n);
2557  ForwardArgs<Scalar> args(inputs, values, this);
2558  args.ptr = ptr;
2559  pOp->forward(args);
2560 
2561  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2562  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2563  std::vector<ad_plain> out(n);
2564  for (size_t i = 0; i < n; i++) out[i].index = ans.index() + i;
2565  return out;
2566  }
2567 
2568  struct ad_plain {
2569  Index index;
2570  static const Index NA = (Index)-1;
2571  bool initialized() const;
2572  bool on_some_tape() const;
2574  void addToTape() const;
2576  global *glob() const;
2580  void override_by(const ad_plain &x) const;
2581 
2586  ad_plain();
2587 
2589  ad_plain(Scalar x);
2591  ad_plain(ad_aug x);
2592 
2594  struct CopyOp : Operator<1> {
2595  static const bool have_eval = true;
2596  template <class Type>
2597  Type eval(Type x0) {
2598  return x0;
2599  }
2600  Replay eval(Replay x0);
2601  template <class Type>
2602  void reverse(ReverseArgs<Type> &args) {
2603  args.dx(0) += args.dy(0);
2604  }
2605  const char *op_name();
2606  };
2614  ad_plain copy() const;
2625  struct ValOp : Operator<1> {
2626  static const bool have_dependencies = true;
2627  static const bool have_eval = true;
2629  template <class Type>
2630  Type eval(Type x0) {
2631  return x0;
2632  }
2633  Replay eval(Replay x0);
2635  template <class Type>
2643  void dependencies(Args<> &args, Dependencies &dep) const;
2644  const char *op_name();
2645  };
2649  ad_plain copy0() const;
2650 
2651  template <bool left_var, bool right_var>
2652  struct AddOp_ : BinaryOperator {
2653  static const bool is_linear = true;
2654  static const bool have_eval = true;
2655  template <class Type>
2656  Type eval(Type x0, Type x1) {
2657  return x0 + x1;
2658  }
2659  template <class Type>
2660  void reverse(ReverseArgs<Type> &args) {
2661  if (left_var) args.dx(0) += args.dy(0);
2662  if (right_var) args.dx(1) += args.dy(0);
2663  }
2664  const char *op_name() { return "AddOp"; }
2665  OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
2666  if (other == get_glob()->getOperator<MulOp>()) {
2667  return get_glob()->getOperator<Fused<AddOp_, MulOp> >();
2668  }
2669  return NULL;
2670  }
2671  };
2672  typedef AddOp_<true, true> AddOp;
2673  ad_plain operator+(const ad_plain &other) const;
2674 
2675  template <bool left_var, bool right_var>
2676  struct SubOp_ : BinaryOperator {
2677  static const bool is_linear = true;
2678  static const bool have_eval = true;
2679  template <class Type>
2680  Type eval(Type x0, Type x1) {
2681  return x0 - x1;
2682  }
2683  template <class Type>
2684  void reverse(ReverseArgs<Type> &args) {
2685  if (left_var) args.dx(0) += args.dy(0);
2686  if (right_var) args.dx(1) -= args.dy(0);
2687  }
2688  const char *op_name() { return "SubOp"; }
2689  };
2690  typedef SubOp_<true, true> SubOp;
2691  ad_plain operator-(const ad_plain &other) const;
2692 
2693  template <bool left_var, bool right_var>
2694  struct MulOp_ : BinaryOperator {
2695  static const bool have_eval = true;
2696  static const bool is_linear = !left_var || !right_var;
2697  template <class Type>
2698  Type eval(Type x0, Type x1) {
2699  return x0 * x1;
2700  }
2701  template <class Type>
2702  void reverse(ReverseArgs<Type> &args) {
2703  if (left_var) args.dx(0) += args.x(1) * args.dy(0);
2704  if (right_var) args.dx(1) += args.x(0) * args.dy(0);
2705  }
2706  const char *op_name() { return "MulOp"; }
2707  };
2708  typedef MulOp_<true, true> MulOp;
2709  ad_plain operator*(const ad_plain &other) const;
2710  ad_plain operator*(const Scalar &other) const;
2711 
2712  template <bool left_var, bool right_var>
2713  struct DivOp_ : BinaryOperator {
2714  static const bool have_eval = true;
2715  template <class Type>
2716  Type eval(Type x0, Type x1) {
2717  return x0 / x1;
2718  }
2719  template <class Type>
2720  void reverse(ReverseArgs<Type> &args) {
2721  Type tmp0 = args.dy(0) / args.x(1);
2722  if (left_var) args.dx(0) += tmp0;
2723  if (right_var) args.dx(1) -= args.y(0) * tmp0;
2724  }
2725  const char *op_name() { return "DivOp"; }
2726  };
2727  typedef DivOp_<true, true> DivOp;
2728  ad_plain operator/(const ad_plain &other) const;
2729 
2730  struct NegOp : UnaryOperator {
2731  static const bool is_linear = true;
2732  static const bool have_eval = true;
2733  template <class Type>
2734  Type eval(Type x0) {
2735  return -x0;
2736  }
2737  template <class Type>
2738  void reverse(ReverseArgs<Type> &args) {
2739  args.dx(0) -= args.dy(0);
2740  }
2741  const char *op_name();
2742  };
2743  ad_plain operator-() const;
2744 
2745  ad_plain &operator+=(const ad_plain &other);
2746  ad_plain &operator-=(const ad_plain &other);
2747  ad_plain &operator*=(const ad_plain &other);
2748  ad_plain &operator/=(const ad_plain &other);
2749 
2750  void Dependent();
2751 
2752  void Independent();
2753  Scalar &Value();
2754  Scalar Value() const;
2755  Scalar Value(global *glob) const;
2756  Scalar &Deriv();
2757  };
2765  bool in_use;
2769  void ad_start();
2771  void ad_stop();
2772  void Independent(std::vector<ad_plain> &x);
2780  struct ad_segment {
2781  ad_plain x;
2782  size_t n;
2783  size_t c;
2785  ad_segment();
2787  ad_segment(ad_plain x, size_t n);
2789  ad_segment(ad_aug x);
2791  ad_segment(Scalar x);
2793  ad_segment(Index idx, size_t n);
2795  ad_segment(ad_plain x, size_t r, size_t c);
2798  ad_segment(Replay *x, size_t n, bool zero_check = false);
2799  bool identicalZero();
2800  bool all_on_active_tape(Replay *x, size_t n);
2801  bool is_contiguous(Replay *x, size_t n);
2802  bool all_zero(Replay *x, size_t n);
2803  bool all_constant(Replay *x, size_t n);
2804  size_t size() const;
2805  size_t rows() const;
2806  size_t cols() const;
2807 
2808  ad_plain operator[](size_t i) const;
2809  ad_plain offset() const;
2810  Index index() const;
2811  };
2831  struct ad_aug {
2834  mutable ad_plain taped_value;
2838  TMBAD_UNION_OR_STRUCT {
2839  Scalar value;
2840  mutable global *glob;
2841  }
2842  data;
2844  bool on_some_tape() const;
2846  bool on_active_tape() const;
2848  bool ontape() const;
2852  bool constant() const;
2853  Index index() const;
2859  global *glob() const;
2861  Scalar Value() const;
2865  ad_aug();
2869  ad_aug(Scalar x);
2871  ad_aug(ad_plain x);
2876  void addToTape() const;
2880  void override_by(const ad_plain &x) const;
2882  bool in_context_stack(global *glob) const;
2885  ad_aug copy() const;
2887  ad_aug copy0() const;
2890  bool identicalZero() const;
2893  bool identicalOne() const;
2897  bool bothConstant(const ad_aug &other) const;
2901  bool identical(const ad_aug &other) const;
2906  ad_aug operator+(const ad_aug &other) const;
2912  ad_aug operator-(const ad_aug &other) const;
2914  ad_aug operator-() const;
2921  ad_aug operator*(const ad_aug &other) const;
2926  ad_aug operator/(const ad_aug &other) const;
2929  ad_aug &operator+=(const ad_aug &other);
2932  ad_aug &operator-=(const ad_aug &other);
2935  ad_aug &operator*=(const ad_aug &other);
2938  ad_aug &operator/=(const ad_aug &other);
2940  void Dependent();
2942  void Independent();
2943  Scalar &Value();
2944  Scalar &Deriv();
2945  };
2946  void Independent(std::vector<ad_aug> &x);
2947 };
2948 
2949 template <class S, class T>
2950 std::ostream &operator<<(std::ostream &os, const std::pair<S, T> &x) {
2951  os << "(" << x.first << ", " << x.second << ")";
2952  return os;
2953 }
2954 
2955 std::ostream &operator<<(std::ostream &os, const global::ad_plain &x);
2956 std::ostream &operator<<(std::ostream &os, const global::ad_aug &x);
2957 
2968 template <class T>
2969 struct adaptive : T {
2970  INHERIT_CTOR(adaptive, T)
2971  bool operator==(const T &other) const {
2972  return this->Value() == other.Value();
2973  }
2974  bool operator!=(const T &other) const {
2975  return this->Value() != other.Value();
2976  }
2977  bool operator>=(const T &other) const {
2978  return this->Value() >= other.Value();
2979  }
2980  bool operator<=(const T &other) const {
2981  return this->Value() <= other.Value();
2982  }
2983  bool operator<(const T &other) const { return this->Value() < other.Value(); }
2984  bool operator>(const T &other) const { return this->Value() > other.Value(); }
2985 
2986  adaptive operator+(const T &other) const {
2987  return adaptive(T(*this) + other);
2988  }
2989  adaptive operator-(const T &other) const {
2990  return adaptive(T(*this) - other);
2991  }
2992  adaptive operator*(const T &other) const {
2993  return adaptive(T(*this) * other);
2994  }
2995  adaptive operator/(const T &other) const {
2996  return adaptive(T(*this) / other);
2997  }
2998 
2999  adaptive operator-() const { return adaptive(-(T(*this))); }
3000 };
3001 
3002 typedef global::ad_plain ad_plain;
3003 typedef global::ad_aug ad_aug;
3004 typedef global::Replay Replay;
3005 typedef adaptive<ad_aug> ad_adapt;
3014 struct ad_plain_index : ad_plain {
3015  ad_plain_index(const Index &i);
3016  ad_plain_index(const ad_plain &x);
3017 };
3018 struct ad_aug_index : ad_aug {
3019  ad_aug_index(const Index &i);
3020  ad_aug_index(const ad_aug &x);
3021  ad_aug_index(const ad_plain &x);
3022 };
3023 
3024 template <class T>
3025 void Independent(std::vector<T> &x) {
3026  for (size_t i = 0; i < x.size(); i++) x[i].Independent();
3027 }
3028 template <class T>
3029 void Dependent(std::vector<T> &x) {
3030  for (size_t i = 0; i < x.size(); i++) x[i].Dependent();
3031 }
3032 template <class T>
3033 Scalar Value(T x) {
3034  return x.Value();
3035 }
3036 Scalar Value(Scalar x);
3037 
3044 template <class V>
3045 bool isContiguous(V &x) {
3046  bool ok = true;
3047  Index j_previous;
3048  for (size_t i = 0; i < (size_t)x.size(); i++) {
3049  if (!x[i].on_some_tape()) {
3050  ok = false;
3051  break;
3052  }
3053  Index j = ad_plain(x[i]).index;
3054  if (i > 0) {
3055  if (j != j_previous + 1) {
3056  ok = false;
3057  break;
3058  }
3059  }
3060  j_previous = j;
3061  }
3062  return ok;
3063 }
3070 template <class V>
3071 V getContiguous(const V &x) {
3072  V y(x.size());
3073  for (size_t i = 0; i < (size_t)x.size(); i++) y[i] = x[i].copy();
3074  return y;
3075 }
3082 template <class V>
3083 void forceContiguous(V &x) {
3084  if (!isContiguous(x)) x = getContiguous(x);
3085 }
3086 ad_aug operator+(const double &x, const ad_aug &y);
3087 ad_aug operator-(const double &x, const ad_aug &y);
3088 ad_aug operator*(const double &x, const ad_aug &y);
3089 ad_aug operator/(const double &x, const ad_aug &y);
3090 
3091 bool operator<(const double &x, const ad_adapt &y);
3092 bool operator<=(const double &x, const ad_adapt &y);
3093 bool operator>(const double &x, const ad_adapt &y);
3094 bool operator>=(const double &x, const ad_adapt &y);
3095 bool operator==(const double &x, const ad_adapt &y);
3096 bool operator!=(const double &x, const ad_adapt &y);
3097 using ::round;
3098 using ::trunc;
3099 using std::ceil;
3100 using std::floor;
3101 Writer floor(const Writer &x);
3102 struct FloorOp : global::UnaryOperator {
3103  static const bool have_eval = true;
3104  template <class Type>
3105  Type eval(Type x) {
3106  return floor(x);
3107  }
3108  template <class Type>
3109  void reverse(ReverseArgs<Type> &args) {}
3110  const char *op_name();
3111 };
3112 ad_plain floor(const ad_plain &x);
3113 ad_aug floor(const ad_aug &x);
3114 Writer ceil(const Writer &x);
3115 struct CeilOp : global::UnaryOperator {
3116  static const bool have_eval = true;
3117  template <class Type>
3118  Type eval(Type x) {
3119  return ceil(x);
3120  }
3121  template <class Type>
3122  void reverse(ReverseArgs<Type> &args) {}
3123  const char *op_name();
3124 };
3125 ad_plain ceil(const ad_plain &x);
3126 ad_aug ceil(const ad_aug &x);
3127 Writer trunc(const Writer &x);
3128 struct TruncOp : global::UnaryOperator {
3129  static const bool have_eval = true;
3130  template <class Type>
3131  Type eval(Type x) {
3132  return trunc(x);
3133  }
3134  template <class Type>
3135  void reverse(ReverseArgs<Type> &args) {}
3136  const char *op_name();
3137 };
3138 ad_plain trunc(const ad_plain &x);
3139 ad_aug trunc(const ad_aug &x);
3140 Writer round(const Writer &x);
3141 struct RoundOp : global::UnaryOperator {
3142  static const bool have_eval = true;
3143  template <class Type>
3144  Type eval(Type x) {
3145  return round(x);
3146  }
3147  template <class Type>
3148  void reverse(ReverseArgs<Type> &args) {}
3149  const char *op_name();
3150 };
3151 ad_plain round(const ad_plain &x);
3152 ad_aug round(const ad_aug &x);
3153 
3154 double sign(const double &x);
3155 Writer sign(const Writer &x);
3156 struct SignOp : global::UnaryOperator {
3157  static const bool have_eval = true;
3158  template <class Type>
3159  Type eval(Type x) {
3160  return sign(x);
3161  }
3162  template <class Type>
3163  void reverse(ReverseArgs<Type> &args) {}
3164  const char *op_name();
3165 };
3166 ad_plain sign(const ad_plain &x);
3167 ad_aug sign(const ad_aug &x);
3168 
3169 double ge0(const double &x);
3170 double lt0(const double &x);
3171 Writer ge0(const Writer &x);
3172 struct Ge0Op : global::UnaryOperator {
3173  static const bool have_eval = true;
3174  template <class Type>
3175  Type eval(Type x) {
3176  return ge0(x);
3177  }
3178  template <class Type>
3179  void reverse(ReverseArgs<Type> &args) {}
3180  const char *op_name();
3181 };
3182 ad_plain ge0(const ad_plain &x);
3183 ad_aug ge0(const ad_aug &x);
3184 Writer lt0(const Writer &x);
3185 struct Lt0Op : global::UnaryOperator {
3186  static const bool have_eval = true;
3187  template <class Type>
3188  Type eval(Type x) {
3189  return lt0(x);
3190  }
3191  template <class Type>
3192  void reverse(ReverseArgs<Type> &args) {}
3193  const char *op_name();
3194 };
3195 ad_plain lt0(const ad_plain &x);
3196 ad_aug lt0(const ad_aug &x);
3197 using ::expm1;
3198 using ::fabs;
3199 using ::log1p;
3200 using std::acos;
3201 using std::acosh;
3202 using std::asin;
3203 using std::asinh;
3204 using std::atan;
3205 using std::atanh;
3206 using std::cos;
3207 using std::cosh;
3208 using std::exp;
3209 using std::log;
3210 using std::sin;
3211 using std::sinh;
3212 using std::sqrt;
3213 using std::tan;
3214 using std::tanh;
3215 
3216 Writer fabs(const Writer &x);
3217 struct AbsOp : global::UnaryOperator {
3218  static const bool have_eval = true;
3219  template <class Type>
3220  Type eval(Type x) {
3221  return fabs(x);
3222  }
3223  template <class Type>
3224  void reverse(ReverseArgs<Type> &args) {
3225  args.dx(0) += args.dy(0) * sign(args.x(0));
3226  }
3227  void reverse(ReverseArgs<Scalar> &args);
3228  const char *op_name();
3229 };
3230 ad_plain fabs(const ad_plain &x);
3231 ad_aug fabs(const ad_aug &x);
3232 ad_adapt fabs(const ad_adapt &x);
3233 Writer cos(const Writer &x);
3234 ad_aug cos(const ad_aug &x);
3235 Writer sin(const Writer &x);
3236 struct SinOp : global::UnaryOperator {
3237  static const bool have_eval = true;
3238  template <class Type>
3239  Type eval(Type x) {
3240  return sin(x);
3241  }
3242  template <class Type>
3243  void reverse(ReverseArgs<Type> &args) {
3244  args.dx(0) += args.dy(0) * cos(args.x(0));
3245  }
3246  void reverse(ReverseArgs<Scalar> &args);
3247  const char *op_name();
3248 };
3249 ad_plain sin(const ad_plain &x);
3250 ad_aug sin(const ad_aug &x);
3251 ad_adapt sin(const ad_adapt &x);
3252 Writer cos(const Writer &x);
3253 struct CosOp : global::UnaryOperator {
3254  static const bool have_eval = true;
3255  template <class Type>
3256  Type eval(Type x) {
3257  return cos(x);
3258  }
3259  template <class Type>
3260  void reverse(ReverseArgs<Type> &args) {
3261  args.dx(0) += args.dy(0) * -sin(args.x(0));
3262  }
3263  void reverse(ReverseArgs<Scalar> &args);
3264  const char *op_name();
3265 };
3266 ad_plain cos(const ad_plain &x);
3267 ad_aug cos(const ad_aug &x);
3268 ad_adapt cos(const ad_adapt &x);
3269 Writer exp(const Writer &x);
3270 struct ExpOp : global::UnaryOperator {
3271  static const bool have_eval = true;
3272  template <class Type>
3273  Type eval(Type x) {
3274  return exp(x);
3275  }
3276  template <class Type>
3277  void reverse(ReverseArgs<Type> &args) {
3278  args.dx(0) += args.dy(0) * args.y(0);
3279  }
3280  void reverse(ReverseArgs<Scalar> &args);
3281  const char *op_name();
3282 };
3283 ad_plain exp(const ad_plain &x);
3284 ad_aug exp(const ad_aug &x);
3285 ad_adapt exp(const ad_adapt &x);
3286 Writer log(const Writer &x);
3287 struct LogOp : global::UnaryOperator {
3288  static const bool have_eval = true;
3289  template <class Type>
3290  Type eval(Type x) {
3291  return log(x);
3292  }
3293  template <class Type>
3294  void reverse(ReverseArgs<Type> &args) {
3295  args.dx(0) += args.dy(0) * Type(1.) / args.x(0);
3296  }
3297  void reverse(ReverseArgs<Scalar> &args);
3298  const char *op_name();
3299 };
3300 ad_plain log(const ad_plain &x);
3301 ad_aug log(const ad_aug &x);
3302 ad_adapt log(const ad_adapt &x);
3303 Writer sqrt(const Writer &x);
3304 struct SqrtOp : global::UnaryOperator {
3305  static const bool have_eval = true;
3306  template <class Type>
3307  Type eval(Type x) {
3308  return sqrt(x);
3309  }
3310  template <class Type>
3311  void reverse(ReverseArgs<Type> &args) {
3312  args.dx(0) += args.dy(0) * Type(0.5) / args.y(0);
3313  }
3314  void reverse(ReverseArgs<Scalar> &args);
3315  const char *op_name();
3316 };
3317 ad_plain sqrt(const ad_plain &x);
3318 ad_aug sqrt(const ad_aug &x);
3319 ad_adapt sqrt(const ad_adapt &x);
3320 Writer tan(const Writer &x);
3321 struct TanOp : global::UnaryOperator {
3322  static const bool have_eval = true;
3323  template <class Type>
3324  Type eval(Type x) {
3325  return tan(x);
3326  }
3327  template <class Type>
3328  void reverse(ReverseArgs<Type> &args) {
3329  args.dx(0) += args.dy(0) * Type(1.) / (cos(args.x(0)) * cos(args.x(0)));
3330  }
3331  void reverse(ReverseArgs<Scalar> &args);
3332  const char *op_name();
3333 };
3334 ad_plain tan(const ad_plain &x);
3335 ad_aug tan(const ad_aug &x);
3336 ad_adapt tan(const ad_adapt &x);
3337 Writer cosh(const Writer &x);
3338 ad_aug cosh(const ad_aug &x);
3339 Writer sinh(const Writer &x);
3340 struct SinhOp : global::UnaryOperator {
3341  static const bool have_eval = true;
3342  template <class Type>
3343  Type eval(Type x) {
3344  return sinh(x);
3345  }
3346  template <class Type>
3347  void reverse(ReverseArgs<Type> &args) {
3348  args.dx(0) += args.dy(0) * cosh(args.x(0));
3349  }
3350  void reverse(ReverseArgs<Scalar> &args);
3351  const char *op_name();
3352 };
3353 ad_plain sinh(const ad_plain &x);
3354 ad_aug sinh(const ad_aug &x);
3355 ad_adapt sinh(const ad_adapt &x);
3356 Writer cosh(const Writer &x);
3357 struct CoshOp : global::UnaryOperator {
3358  static const bool have_eval = true;
3359  template <class Type>
3360  Type eval(Type x) {
3361  return cosh(x);
3362  }
3363  template <class Type>
3364  void reverse(ReverseArgs<Type> &args) {
3365  args.dx(0) += args.dy(0) * sinh(args.x(0));
3366  }
3367  void reverse(ReverseArgs<Scalar> &args);
3368  const char *op_name();
3369 };
3370 ad_plain cosh(const ad_plain &x);
3371 ad_aug cosh(const ad_aug &x);
3372 ad_adapt cosh(const ad_adapt &x);
3373 Writer tanh(const Writer &x);
3374 struct TanhOp : global::UnaryOperator {
3375  static const bool have_eval = true;
3376  template <class Type>
3377  Type eval(Type x) {
3378  return tanh(x);
3379  }
3380  template <class Type>
3381  void reverse(ReverseArgs<Type> &args) {
3382  args.dx(0) += args.dy(0) * Type(1.) / (cosh(args.x(0)) * cosh(args.x(0)));
3383  }
3384  void reverse(ReverseArgs<Scalar> &args);
3385  const char *op_name();
3386 };
3387 ad_plain tanh(const ad_plain &x);
3388 ad_aug tanh(const ad_aug &x);
3389 ad_adapt tanh(const ad_adapt &x);
3390 Writer expm1(const Writer &x);
3391 struct Expm1 : global::UnaryOperator {
3392  static const bool have_eval = true;
3393  template <class Type>
3394  Type eval(Type x) {
3395  return expm1(x);
3396  }
3397  template <class Type>
3398  void reverse(ReverseArgs<Type> &args) {
3399  args.dx(0) += args.dy(0) * args.y(0) + Type(1.);
3400  }
3401  void reverse(ReverseArgs<Scalar> &args);
3402  const char *op_name();
3403 };
3404 ad_plain expm1(const ad_plain &x);
3405 ad_aug expm1(const ad_aug &x);
3406 ad_adapt expm1(const ad_adapt &x);
3407 Writer log1p(const Writer &x);
3408 struct Log1p : global::UnaryOperator {
3409  static const bool have_eval = true;
3410  template <class Type>
3411  Type eval(Type x) {
3412  return log1p(x);
3413  }
3414  template <class Type>
3415  void reverse(ReverseArgs<Type> &args) {
3416  args.dx(0) += args.dy(0) * Type(1.) / (args.x(0) + Type(1.));
3417  }
3418  void reverse(ReverseArgs<Scalar> &args);
3419  const char *op_name();
3420 };
3421 ad_plain log1p(const ad_plain &x);
3422 ad_aug log1p(const ad_aug &x);
3423 ad_adapt log1p(const ad_adapt &x);
3424 Writer asin(const Writer &x);
3425 struct AsinOp : global::UnaryOperator {
3426  static const bool have_eval = true;
3427  template <class Type>
3428  Type eval(Type x) {
3429  return asin(x);
3430  }
3431  template <class Type>
3432  void reverse(ReverseArgs<Type> &args) {
3433  args.dx(0) +=
3434  args.dy(0) * Type(1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
3435  }
3436  void reverse(ReverseArgs<Scalar> &args);
3437  const char *op_name();
3438 };
3439 ad_plain asin(const ad_plain &x);
3440 ad_aug asin(const ad_aug &x);
3441 ad_adapt asin(const ad_adapt &x);
3442 Writer acos(const Writer &x);
3443 struct AcosOp : global::UnaryOperator {
3444  static const bool have_eval = true;
3445  template <class Type>
3446  Type eval(Type x) {
3447  return acos(x);
3448  }
3449  template <class Type>
3450  void reverse(ReverseArgs<Type> &args) {
3451  args.dx(0) +=
3452  args.dy(0) * Type(-1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
3453  }
3454  void reverse(ReverseArgs<Scalar> &args);
3455  const char *op_name();
3456 };
3457 ad_plain acos(const ad_plain &x);
3458 ad_aug acos(const ad_aug &x);
3459 ad_adapt acos(const ad_adapt &x);
3460 Writer atan(const Writer &x);
3461 struct AtanOp : global::UnaryOperator {
3462  static const bool have_eval = true;
3463  template <class Type>
3464  Type eval(Type x) {
3465  return atan(x);
3466  }
3467  template <class Type>
3468  void reverse(ReverseArgs<Type> &args) {
3469  args.dx(0) += args.dy(0) * Type(1.) / (Type(1.) + args.x(0) * args.x(0));
3470  }
3471  void reverse(ReverseArgs<Scalar> &args);
3472  const char *op_name();
3473 };
3474 ad_plain atan(const ad_plain &x);
3475 ad_aug atan(const ad_aug &x);
3476 ad_adapt atan(const ad_adapt &x);
3477 Writer asinh(const Writer &x);
3478 struct AsinhOp : global::UnaryOperator {
3479  static const bool have_eval = true;
3480  template <class Type>
3481  Type eval(Type x) {
3482  return asinh(x);
3483  }
3484  template <class Type>
3485  void reverse(ReverseArgs<Type> &args) {
3486  args.dx(0) +=
3487  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) + Type(1.));
3488  }
3489  void reverse(ReverseArgs<Scalar> &args);
3490  const char *op_name();
3491 };
3492 ad_plain asinh(const ad_plain &x);
3493 ad_aug asinh(const ad_aug &x);
3494 ad_adapt asinh(const ad_adapt &x);
3495 Writer acosh(const Writer &x);
3496 struct AcoshOp : global::UnaryOperator {
3497  static const bool have_eval = true;
3498  template <class Type>
3499  Type eval(Type x) {
3500  return acosh(x);
3501  }
3502  template <class Type>
3503  void reverse(ReverseArgs<Type> &args) {
3504  args.dx(0) +=
3505  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) - Type(1.));
3506  }
3507  void reverse(ReverseArgs<Scalar> &args);
3508  const char *op_name();
3509 };
3510 ad_plain acosh(const ad_plain &x);
3511 ad_aug acosh(const ad_aug &x);
3512 ad_adapt acosh(const ad_adapt &x);
3513 Writer atanh(const Writer &x);
3514 struct AtanhOp : global::UnaryOperator {
3515  static const bool have_eval = true;
3516  template <class Type>
3517  Type eval(Type x) {
3518  return atanh(x);
3519  }
3520  template <class Type>
3521  void reverse(ReverseArgs<Type> &args) {
3522  args.dx(0) += args.dy(0) * Type(1.) / (Type(1) - args.x(0) * args.x(0));
3523  }
3524  void reverse(ReverseArgs<Scalar> &args);
3525  const char *op_name();
3526 };
3527 ad_plain atanh(const ad_plain &x);
3528 ad_aug atanh(const ad_aug &x);
3529 ad_adapt atanh(const ad_adapt &x);
3530 
3531 template <class T>
3532 T abs(const T &x) {
3533  return fabs(x);
3534 }
3535 using std::pow;
3536 Writer pow(const Writer &x1, const Writer &x2);
3537 struct PowOp : global::BinaryOperator {
3538  static const bool have_eval = true;
3539  template <class Type>
3540  Type eval(Type x1, Type x2) {
3541  return pow(x1, x2);
3542  }
3543  template <class Type>
3544  void reverse(ReverseArgs<Type> &args) {
3545  args.dx(0) += args.dy(0) * args.x(1) * pow(args.x(0), args.x(1) - Type(1.));
3546  args.dx(1) += args.dy(0) * args.y(0) * log(args.x(0));
3547  }
3548  const char *op_name();
3549 };
3550 ad_plain pow(const ad_plain &x1, const ad_plain &x2);
3551 ad_aug pow(const ad_aug &x1, const ad_aug &x2);
3552 ad_adapt pow(const ad_adapt &x1, const ad_adapt &x2);
3553 using std::atan2;
3554 Writer atan2(const Writer &x1, const Writer &x2);
3555 struct Atan2 : global::BinaryOperator {
3556  static const bool have_eval = true;
3557  template <class Type>
3558  Type eval(Type x1, Type x2) {
3559  return atan2(x1, x2);
3560  }
3561  template <class Type>
3562  void reverse(ReverseArgs<Type> &args) {
3563  args.dx(0) += args.dy(0) * args.x(1) /
3564  (args.x(0) * args.x(0) + args.x(1) * args.x(1));
3565  args.dx(1) += args.dy(0) * -args.x(0) /
3566  (args.x(0) * args.x(0) + args.x(1) * args.x(1));
3567  }
3568  const char *op_name();
3569 };
3570 ad_plain atan2(const ad_plain &x1, const ad_plain &x2);
3571 ad_aug atan2(const ad_aug &x1, const ad_aug &x2);
3572 ad_adapt atan2(const ad_adapt &x1, const ad_adapt &x2);
3573 using std::max;
3574 Writer max(const Writer &x1, const Writer &x2);
3575 struct MaxOp : global::BinaryOperator {
3576  static const bool have_eval = true;
3577  template <class Type>
3578  Type eval(Type x1, Type x2) {
3579  return max(x1, x2);
3580  }
3581  template <class Type>
3582  void reverse(ReverseArgs<Type> &args) {
3583  args.dx(0) += args.dy(0) * ge0(args.x(0) - args.x(1));
3584  args.dx(1) += args.dy(0) * lt0(args.x(0) - args.x(1));
3585  }
3586  const char *op_name();
3587 };
3588 ad_plain max(const ad_plain &x1, const ad_plain &x2);
3589 ad_aug max(const ad_aug &x1, const ad_aug &x2);
3590 ad_adapt max(const ad_adapt &x1, const ad_adapt &x2);
3591 
3592 using std::min;
3593 Writer min(const Writer &x1, const Writer &x2);
3594 struct MinOp : global::BinaryOperator {
3595  static const bool have_eval = true;
3596  template <class Type>
3597  Type eval(Type x1, Type x2) {
3598  return min(x1, x2);
3599  }
3600  template <class Type>
3601  void reverse(ReverseArgs<Type> &args) {
3602  args.dx(0) += args.dy(0) * ge0(args.x(1) - args.x(0));
3603  args.dx(1) += args.dy(0) * lt0(args.x(1) - args.x(0));
3604  }
3605  const char *op_name();
3606 };
3607 ad_plain min(const ad_plain &x1, const ad_plain &x2);
3608 ad_aug min(const ad_aug &x1, const ad_aug &x2);
3609 ad_adapt min(const ad_adapt &x1, const ad_adapt &x2);
3610 Replay CondExpEq(const Replay &x0, const Replay &x1, const Replay &x2,
3611  const Replay &x3);
3612 struct CondExpEqOp : global::Operator<4, 1> {
3613  void forward(ForwardArgs<Scalar> &args);
3614  void reverse(ReverseArgs<Scalar> &args);
3615  void forward(ForwardArgs<Replay> &args);
3616  void reverse(ReverseArgs<Replay> &args);
3617  void forward(ForwardArgs<Writer> &args);
3618  void reverse(ReverseArgs<Writer> &args);
3619  template <class Type>
3620  void forward(ForwardArgs<Type> &args) {
3621  TMBAD_ASSERT(false);
3622  }
3623  template <class Type>
3624  void reverse(ReverseArgs<Type> &args) {
3625  TMBAD_ASSERT(false);
3626  }
3627  const char *op_name();
3628 };
3629 Scalar CondExpEq(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3630  const Scalar &x3);
3631 ad_plain CondExpEq(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3632  const ad_plain &x3);
3633 ad_aug CondExpEq(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3634  const ad_aug &x3);
3635 Replay CondExpNe(const Replay &x0, const Replay &x1, const Replay &x2,
3636  const Replay &x3);
3637 struct CondExpNeOp : global::Operator<4, 1> {
3638  void forward(ForwardArgs<Scalar> &args);
3639  void reverse(ReverseArgs<Scalar> &args);
3640  void forward(ForwardArgs<Replay> &args);
3641  void reverse(ReverseArgs<Replay> &args);
3642  void forward(ForwardArgs<Writer> &args);
3643  void reverse(ReverseArgs<Writer> &args);
3644  template <class Type>
3645  void forward(ForwardArgs<Type> &args) {
3646  TMBAD_ASSERT(false);
3647  }
3648  template <class Type>
3649  void reverse(ReverseArgs<Type> &args) {
3650  TMBAD_ASSERT(false);
3651  }
3652  const char *op_name();
3653 };
3654 Scalar CondExpNe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3655  const Scalar &x3);
3656 ad_plain CondExpNe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3657  const ad_plain &x3);
3658 ad_aug CondExpNe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3659  const ad_aug &x3);
3660 Replay CondExpGt(const Replay &x0, const Replay &x1, const Replay &x2,
3661  const Replay &x3);
3662 struct CondExpGtOp : global::Operator<4, 1> {
3663  void forward(ForwardArgs<Scalar> &args);
3664  void reverse(ReverseArgs<Scalar> &args);
3665  void forward(ForwardArgs<Replay> &args);
3666  void reverse(ReverseArgs<Replay> &args);
3667  void forward(ForwardArgs<Writer> &args);
3668  void reverse(ReverseArgs<Writer> &args);
3669  template <class Type>
3670  void forward(ForwardArgs<Type> &args) {
3671  TMBAD_ASSERT(false);
3672  }
3673  template <class Type>
3674  void reverse(ReverseArgs<Type> &args) {
3675  TMBAD_ASSERT(false);
3676  }
3677  const char *op_name();
3678 };
3679 Scalar CondExpGt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3680  const Scalar &x3);
3681 ad_plain CondExpGt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3682  const ad_plain &x3);
3683 ad_aug CondExpGt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3684  const ad_aug &x3);
3685 Replay CondExpLt(const Replay &x0, const Replay &x1, const Replay &x2,
3686  const Replay &x3);
3687 struct CondExpLtOp : global::Operator<4, 1> {
3688  void forward(ForwardArgs<Scalar> &args);
3689  void reverse(ReverseArgs<Scalar> &args);
3690  void forward(ForwardArgs<Replay> &args);
3691  void reverse(ReverseArgs<Replay> &args);
3692  void forward(ForwardArgs<Writer> &args);
3693  void reverse(ReverseArgs<Writer> &args);
3694  template <class Type>
3695  void forward(ForwardArgs<Type> &args) {
3696  TMBAD_ASSERT(false);
3697  }
3698  template <class Type>
3699  void reverse(ReverseArgs<Type> &args) {
3700  TMBAD_ASSERT(false);
3701  }
3702  const char *op_name();
3703 };
3704 Scalar CondExpLt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3705  const Scalar &x3);
3706 ad_plain CondExpLt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3707  const ad_plain &x3);
3708 ad_aug CondExpLt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3709  const ad_aug &x3);
3710 Replay CondExpGe(const Replay &x0, const Replay &x1, const Replay &x2,
3711  const Replay &x3);
3712 struct CondExpGeOp : global::Operator<4, 1> {
3713  void forward(ForwardArgs<Scalar> &args);
3714  void reverse(ReverseArgs<Scalar> &args);
3715  void forward(ForwardArgs<Replay> &args);
3716  void reverse(ReverseArgs<Replay> &args);
3717  void forward(ForwardArgs<Writer> &args);
3718  void reverse(ReverseArgs<Writer> &args);
3719  template <class Type>
3720  void forward(ForwardArgs<Type> &args) {
3721  TMBAD_ASSERT(false);
3722  }
3723  template <class Type>
3724  void reverse(ReverseArgs<Type> &args) {
3725  TMBAD_ASSERT(false);
3726  }
3727  const char *op_name();
3728 };
3729 Scalar CondExpGe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3730  const Scalar &x3);
3731 ad_plain CondExpGe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3732  const ad_plain &x3);
3733 ad_aug CondExpGe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3734  const ad_aug &x3);
3735 Replay CondExpLe(const Replay &x0, const Replay &x1, const Replay &x2,
3736  const Replay &x3);
3737 struct CondExpLeOp : global::Operator<4, 1> {
3738  void forward(ForwardArgs<Scalar> &args);
3739  void reverse(ReverseArgs<Scalar> &args);
3740  void forward(ForwardArgs<Replay> &args);
3741  void reverse(ReverseArgs<Replay> &args);
3742  void forward(ForwardArgs<Writer> &args);
3743  void reverse(ReverseArgs<Writer> &args);
3744  template <class Type>
3745  void forward(ForwardArgs<Type> &args) {
3746  TMBAD_ASSERT(false);
3747  }
3748  template <class Type>
3749  void reverse(ReverseArgs<Type> &args) {
3750  TMBAD_ASSERT(false);
3751  }
3752  const char *op_name();
3753 };
3754 Scalar CondExpLe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3755  const Scalar &x3);
3756 ad_plain CondExpLe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3757  const ad_plain &x3);
3758 ad_aug CondExpLe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3759  const ad_aug &x3);
3760 
3761 template <class Info>
3762 struct InfoOp : global::DynamicOperator<-1, 0> {
3763  Index n;
3764  Info info;
3765  InfoOp(Index n, Info info) : n(n), info(info) {}
3766  static const bool elimination_protected = true;
3767  static const bool add_forward_replay_copy = true;
3768  static const bool have_input_size_output_size = true;
3769  template <class Type>
3770  void forward(ForwardArgs<Type> &args) {}
3771  template <class Type>
3772  void reverse(ReverseArgs<Type> &args) {}
3773  Index input_size() const { return n; }
3774  Index output_size() const { return 0; }
3775  const char *op_name() { return "InfoOp"; }
3776  void print(global::print_config cfg) {
3777  Rcout << cfg.prefix << info << std::endl;
3778  }
3779  void *operator_data() { return &info; }
3780 };
3781 template <class Info>
3782 void addInfo(const std::vector<ad_aug> &x, const Info &info) {
3783  global::Complete<InfoOp<Info> >(x.size(), info)(x);
3784 }
3785 template <class Info>
3786 void addInfo(const std::vector<double> &x, const Info &info) {}
3787 
3788 struct SumOp : global::DynamicOperator<-1, 1> {
3789  static const bool is_linear = true;
3790  static const bool have_input_size_output_size = true;
3791  static const bool add_forward_replay_copy = true;
3792  size_t n;
3793  Index input_size() const;
3794  Index output_size() const;
3795  SumOp(size_t n);
3796  template <class Type>
3797  void forward(ForwardArgs<Type> &args) {
3798  args.y(0) = 0;
3799  for (size_t i = 0; i < n; i++) {
3800  args.y(0) += args.x(i);
3801  }
3802  }
3803  template <class Type>
3804  void reverse(ReverseArgs<Type> &args) {
3805  for (size_t i = 0; i < n; i++) {
3806  args.dx(i) += args.dy(0);
3807  }
3808  }
3809  const char *op_name();
3810 };
3811 template <class T>
3812 T sum(const std::vector<T> &x) {
3813  return global::Complete<SumOp>(x.size())(x)[0];
3814 }
3815 
3816 ad_plain logspace_sum(const std::vector<ad_plain> &x);
3817 struct LogSpaceSumOp : global::DynamicOperator<-1, 1> {
3818  size_t n;
3819  static const bool have_input_size_output_size = true;
3820  Index input_size() const;
3821  Index output_size() const;
3822  LogSpaceSumOp(size_t n);
3823  void forward(ForwardArgs<Scalar> &args);
3824  void forward(ForwardArgs<Replay> &args);
3825  template <class Type>
3826  void reverse(ReverseArgs<Type> &args) {
3827  for (size_t i = 0; i < n; i++) {
3828  args.dx(i) += exp(args.x(i) - args.y(0)) * args.dy(0);
3829  }
3830  }
3831  const char *op_name();
3832 };
3833 ad_plain logspace_sum(const std::vector<ad_plain> &x);
3834 template <class T>
3835 T logspace_sum(const std::vector<T> &x_) {
3836  std::vector<ad_plain> x(x_.begin(), x_.end());
3837  return logspace_sum(x);
3838 }
3839 
3840 ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
3841  const std::vector<Index> &stride, size_t n);
3842 struct LogSpaceSumStrideOp : global::DynamicOperator<-1, 1> {
3843  std::vector<Index> stride;
3844  size_t n;
3845  static const bool have_input_size_output_size = true;
3846 
3847  Index number_of_terms() const;
3848  template <class Type>
3849  Type &entry(Type **px, size_t i, size_t j) const {
3850  return px[j][0 + i * stride[j]];
3851  }
3852  template <class Type>
3853  Type rowsum(Type **px, size_t i) const {
3854  size_t m = stride.size();
3855  Type s = (Scalar)(0);
3856  for (size_t j = 0; j < m; j++) {
3857  s += entry(px, i, j);
3858  }
3859  return s;
3860  }
3861  Index input_size() const;
3862  Index output_size() const;
3863  LogSpaceSumStrideOp(std::vector<Index> stride, size_t n);
3864  void forward(ForwardArgs<Scalar> &args);
3865  void forward(ForwardArgs<Replay> &args);
3866  template <class Type>
3867  void reverse(ReverseArgs<Type> &args) {
3868  size_t m = stride.size();
3869  std::vector<Type *> wrk1(m);
3870  std::vector<Type *> wrk2(m);
3871  Type **px = &(wrk1[0]);
3872  Type **pdx = &(wrk2[0]);
3873  for (size_t i = 0; i < m; i++) {
3874  px[i] = args.x_ptr(i);
3875  pdx[i] = args.dx_ptr(i);
3876  }
3877  for (size_t i = 0; i < n; i++) {
3878  Type s = rowsum(px, i);
3879  Type tmp = exp(s - args.y(0)) * args.dy(0);
3880  for (size_t j = 0; j < m; j++) {
3881  entry(pdx, i, j) += tmp;
3882  }
3883  }
3884  }
3889  void dependencies(Args<> &args, Dependencies &dep) const;
3891  static const bool have_dependencies = true;
3893  static const bool implicit_dependencies = true;
3895  static const bool allow_remap = false;
3896  const char *op_name();
3897 
3898  void forward(ForwardArgs<Writer> &args);
3899  void reverse(ReverseArgs<Writer> &args);
3900 };
3901 ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
3902  const std::vector<Index> &stride, size_t n);
3903 template <class T>
3904 T logspace_sum_stride(const std::vector<T> &x_,
3905  const std::vector<Index> &stride, size_t n) {
3906  std::vector<ad_plain> x(x_.begin(), x_.end());
3907  return logspace_sum_stride(x, stride, n);
3908 }
3909 } // namespace TMBad
3910 #endif // HAVE_GLOBAL_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_GLOBAL_HPP
2 #define HAVE_GLOBAL_HPP
3 // Autogenerated - do not edit by hand !
4 #include <algorithm>
5 #include <cmath>
6 #include <ctime>
7 #include <iomanip>
8 #include <iostream>
9 #include <limits>
10 #include <set>
11 #include <sstream>
12 #include <valarray>
13 #include <vector>
14 #include "config.hpp"
15 #include "radix.hpp"
16 
20 namespace TMBad {
21 
22 typedef TMBAD_HASH_TYPE hash_t;
23 typedef TMBAD_INDEX_TYPE Index;
24 typedef TMBAD_SCALAR_TYPE Scalar;
25 typedef std::pair<Index, Index> IndexPair;
26 typedef TMBAD_INDEX_VECTOR IndexVector;
27 
28 struct global;
31 global *get_glob();
32 
33 template <class T>
34 std::ostream &operator<<(std::ostream &out, const std::vector<T> &v) {
35  out << "{";
36  size_t last = v.size() - 1;
37  for (size_t i = 0; i < v.size(); ++i) {
38  out << v[i];
39  if (i != last) out << ", ";
40  }
41  out << "}";
42  return out;
43 }
44 
46 template <class T>
47 struct intervals {
48  struct ep : std::pair<T, bool> {
49  bool left() const { return !this->second; }
50  ep(T x, bool type) : std::pair<T, bool>(x, type) {}
51  operator T() { return this->first; }
52  };
53  std::set<ep> x;
54  typedef typename std::set<ep>::iterator iterator;
58  bool insert(T a, T b) {
59  ep x1(a, false);
60  ep x2(b, true);
61  iterator it1 = x.upper_bound(x1);
62  iterator it2 = x.lower_bound(x2);
63 
64  bool insert_x1 = (it1 == x.end()) || it1->left();
65  bool insert_x2 = (it2 == x.end()) || it2->left();
66 
67  bool change = (it1 != it2) || insert_x1;
68 
69  if (it1 != it2) {
70  x.erase(it1, it2);
71  }
72 
73  if (insert_x1) x.insert(x1);
74  if (insert_x2) x.insert(x2);
75  return change;
76  }
78  template <class F>
79  F &apply(F &f) const {
80  for (iterator it = x.begin(); it != x.end();) {
81  ep a = *it;
82  ++it;
83  ep b = *it;
84  ++it;
85  f(a, b);
86  }
87  return f;
88  }
89  struct print_interval {
90  void operator()(T a, T b) { Rcout << "[ " << a << " , " << b << " ] "; }
91  };
92  void print() {
93  print_interval f;
94  this->apply(f);
95  Rcout << "\n";
96  }
97 };
98 
99 struct Dependencies : std::vector<Index> {
100  typedef std::vector<Index> Base;
101  std::vector<std::pair<Index, Index> > I;
102  Dependencies();
103  void clear();
104  void add_interval(Index a, Index b);
105  void add_segment(Index start, Index size);
106 
107  void monotone_transform_inplace(const std::vector<Index> &x);
108 
109  template <class F>
110  F &apply(F &f) {
111  for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
112  for (size_t i = 0; i < I.size(); i++) {
113  for (Index j = I[i].first; j <= I[i].second; j++) {
114  f(j);
115  }
116  }
117  return f;
118  }
119 
120  template <class F>
121  F &apply_if_not_visited(F &f, intervals<Index> &visited) {
122  for (size_t i = 0; i < this->size(); i++) f((*this)[i]);
123  for (size_t i = 0; i < I.size(); i++) {
124  if (visited.insert(I[i].first, I[i].second)) {
125  for (Index j = I[i].first; j <= I[i].second; j++) {
126  f(j);
127  }
128  }
129  }
130  return f;
131  }
132 
133  bool any(const std::vector<bool> &x) const;
134 };
135 
138 enum ArrayAccess { x_read, y_read, y_write, dx_read, dx_write, dy_read };
139 template <class Args, ArrayAccess What>
140 struct Accessor {};
141 template <class Args>
142 struct Accessor<Args, x_read> {
143  typename Args::value_type operator()(const Args &args, Index j) const {
144  return args.x(j);
145  }
146 };
147 template <class Args>
148 struct Accessor<Args, y_read> {
149  typename Args::value_type operator()(const Args &args, Index j) const {
150  return args.y(j);
151  }
152 };
153 template <class Args>
154 struct Accessor<Args, y_write> {
155  typename Args::value_type &operator()(Args &args, Index j) {
156  return args.y(j);
157  }
158 };
159 template <class Args>
160 struct Accessor<Args, dx_read> {
161  typename Args::value_type operator()(const Args &args, Index j) const {
162  return args.dx(j);
163  }
164 };
165 template <class Args>
166 struct Accessor<Args, dx_write> {
167  typename Args::value_type &operator()(Args &args, Index j) {
168  return args.dx(j);
169  }
170 };
171 template <class Args>
172 struct Accessor<Args, dy_read> {
173  typename Args::value_type operator()(const Args &args, Index j) const {
174  return args.dy(j);
175  }
176 };
177 
183 template <class T>
185  const std::vector<T> &x;
186  const std::vector<Index> &i;
187  IndirectAccessor(const std::vector<T> &x, const std::vector<Index> &i)
188  : x(x), i(i) {}
189  T operator[](size_t j) const { return x[i[j]]; }
190  size_t size() const { return i.size(); }
191  operator std::vector<T>() const {
192  std::vector<T> ans(i.size());
193  for (size_t j = 0; j < ans.size(); j++) ans[j] = (*this)[j];
194  return ans;
195  }
196 };
197 
205 template <class Args, ArrayAccess What>
206 struct segment_ref {
207  typedef typename Args::value_type Type;
208  Accessor<Args, What> element_access;
209  Args args;
210  Index from, n;
211  segment_ref(const Args &args, Index from, Index n)
212  : args(args), from(from), n(n) {}
213  template <class Other>
214  operator Other() {
215  Other ans(n);
216  for (size_t i = 0; i < n; i++) {
217  ans[i] = element_access(args, from + i);
218  }
219  return ans;
220  }
221  Type operator[](Index i) const { return element_access(args, from + i); }
222  size_t size() const { return n; }
223  template <class Other>
224  segment_ref &operator=(const Other &other) {
225  for (size_t i = 0; i < n; i++) {
226  element_access(args, from + i) = other[i];
227  }
228  return *this;
229  }
230  template <class Other>
231  segment_ref &operator+=(const Other &other) {
232  for (size_t i = 0; i < n; i++) {
233  element_access(args, from + i) += other[i];
234  }
235  return *this;
236  }
237  template <class Other>
238  segment_ref &operator-=(const Other &other) {
239  for (size_t i = 0; i < n; i++) {
240  element_access(args, from + i) -= other[i];
241  }
242  return *this;
243  }
244 };
245 
255 template <class dummy = void>
256 struct Args {
258  const Index *inputs;
263  IndexPair ptr;
265  Index input(Index j) const { return inputs[ptr.first + j]; }
267  Index output(Index j) const { return ptr.second + j; }
268  Args(const IndexVector &inputs) : inputs(inputs.data()) {
269  ptr.first = 0;
270  ptr.second = 0;
271  }
272 };
278 template <class Type>
279 struct ForwardArgs : Args<> {
280  typedef std::vector<Type> TypeVector;
281  typedef Type value_type;
282  Type *values;
283  global *glob_ptr;
285  Type x(Index j) const { return values[input(j)]; }
287  Type &y(Index j) { return values[output(j)]; }
289  Type *x_ptr(Index j) { return &values[input(j)]; }
291  Type *y_ptr(Index j) { return &values[output(j)]; }
293  segment_ref<ForwardArgs, x_read> x_segment(Index from, Index size) {
294  return segment_ref<ForwardArgs, x_read>(*this, from, size);
295  }
298  return segment_ref<ForwardArgs, y_write>(*this, from, size);
299  }
300  ForwardArgs(const IndexVector &inputs, TypeVector &values,
301  global *glob_ptr = NULL)
302  : Args<>(inputs), values(values.data()), glob_ptr(glob_ptr) {}
303 };
310 template <class Type>
311 struct ReverseArgs : Args<> {
312  typedef std::vector<Type> TypeVector;
313  typedef Type value_type;
314  Type *values;
315  Type *derivs;
316  global *glob_ptr;
318  Type x(Index j) const { return values[input(j)]; }
320  Type y(Index j) const { return values[output(j)]; }
323  Type &dx(Index j) { return derivs[input(j)]; }
326  Type dy(Index j) const { return derivs[output(j)]; }
328  Type *x_ptr(Index j) { return &values[input(j)]; }
330  Type *y_ptr(Index j) { return &values[output(j)]; }
332  Type *dx_ptr(Index j) { return &derivs[input(j)]; }
334  Type *dy_ptr(Index j) { return &derivs[output(j)]; }
336  segment_ref<ReverseArgs, x_read> x_segment(Index from, Index size) {
337  return segment_ref<ReverseArgs, x_read>(*this, from, size);
338  }
340  segment_ref<ReverseArgs, y_read> y_segment(Index from, Index size) {
341  return segment_ref<ReverseArgs, y_read>(*this, from, size);
342  }
345  return segment_ref<ReverseArgs, dx_write>(*this, from, size);
346  }
349  return segment_ref<ReverseArgs, dy_read>(*this, from, size);
350  }
351  ReverseArgs(const IndexVector &inputs, TypeVector &values, TypeVector &derivs,
352  global *glob_ptr = NULL)
353  : Args<>(inputs),
354  values(values.data()),
355  derivs(derivs.data()),
356  glob_ptr(glob_ptr) {
357  ptr.first = (Index)inputs.size();
358  ptr.second = (Index)values.size();
359  }
360 };
361 
362 template <>
363 struct ForwardArgs<bool> : Args<> {
364  typedef std::vector<bool> BoolVector;
365  BoolVector &values;
366  intervals<Index> &marked_intervals;
367  bool x(Index j) { return values[input(j)]; }
368  BoolVector::reference y(Index j) { return values[output(j)]; }
369  ForwardArgs(const IndexVector &inputs, BoolVector &values,
370  intervals<Index> &marked_intervals)
371  : Args<>(inputs), values(values), marked_intervals(marked_intervals) {}
373  template <class Operator>
374  bool any_marked_input(const Operator &op) {
375  if (Operator::implicit_dependencies) {
376  Dependencies dep;
377  op.dependencies(*this, dep);
378  return dep.any(values);
379  } else {
380  Index ninput = op.input_size();
381  for (Index j = 0; j < ninput; j++)
382  if (x(j)) return true;
383  }
384  return false;
385  }
387  template <class Operator>
388  void mark_all_output(const Operator &op) {
389  if (Operator::updating && op.output_size() == 0) {
390  Dependencies dep;
391  op.dependencies_updating(*this, dep);
392 
393  for (size_t i = 0; i < dep.size(); i++) values[dep[i]] = true;
394 
395  for (size_t i = 0; i < dep.I.size(); i++) {
396  Index a = dep.I[i].first;
397  Index b = dep.I[i].second;
398  bool insert = marked_intervals.insert(a, b);
399  if (insert) {
400  for (Index j = a; j <= b; j++) {
401  values[j] = true;
402  }
403  }
404  }
405  } else {
406  Index noutput = op.output_size();
407  for (Index j = 0; j < noutput; j++) y(j) = true;
408  }
409  }
411  template <class Operator>
412  bool mark_dense(const Operator &op) {
413  if (any_marked_input(op)) {
414  mark_all_output(op);
415  return true;
416  }
417  return false;
418  }
419 };
420 
421 template <>
422 struct ReverseArgs<bool> : Args<> {
423  typedef std::vector<bool> BoolVector;
424  BoolVector &values;
425  intervals<Index> &marked_intervals;
426  BoolVector::reference x(Index j) { return values[input(j)]; }
427  bool y(Index j) { return values[output(j)]; }
428  ReverseArgs(IndexVector &inputs, BoolVector &values,
429  intervals<Index> &marked_intervals)
430  : Args<>(inputs), values(values), marked_intervals(marked_intervals) {
431  ptr.first = (Index)inputs.size();
432  ptr.second = (Index)values.size();
433  }
435  template <class Operator>
436  bool any_marked_output(const Operator &op) {
437  if (Operator::elimination_protected) return true;
438  if (Operator::updating && op.output_size() == 0) {
439  Dependencies dep;
440  op.dependencies_updating(*this, dep);
441  return dep.any(values);
442  } else {
443  Index noutput = op.output_size();
444  for (Index j = 0; j < noutput; j++)
445  if (y(j)) return true;
446  }
447  return false;
448  }
450  template <class Operator>
451  void mark_all_input(const Operator &op) {
452  if (Operator::implicit_dependencies) {
453  Dependencies dep;
454  op.dependencies(*this, dep);
455 
456  for (size_t i = 0; i < dep.size(); i++) values[dep[i]] = true;
457 
458  for (size_t i = 0; i < dep.I.size(); i++) {
459  Index a = dep.I[i].first;
460  Index b = dep.I[i].second;
461  bool insert = marked_intervals.insert(a, b);
462  if (insert) {
463  for (Index j = a; j <= b; j++) {
464  values[j] = true;
465  }
466  }
467  }
468  } else {
469  Index ninput = op.input_size();
470  for (Index j = 0; j < ninput; j++) x(j) = true;
471  }
472  }
474  template <class Operator>
475  bool mark_dense(const Operator &op) {
476  if (any_marked_output(op)) {
477  mark_all_input(op);
478  return true;
479  }
480  return false;
481  }
482 };
483 
484 std::string tostr(const Index &x);
485 
486 std::string tostr(const Scalar &x);
487 
488 struct Writer : std::string {
489  static std::ostream *cout;
490  Writer(std::string str);
491  Writer(Scalar x);
492  Writer();
493 
494  template <class V>
495  std::string vinit(const V &x) {
496  std::string y = "{";
497  for (size_t i = 0; i < x.size(); i++)
498  y = y + (i == 0 ? "" : ",") + tostr(x[i]);
499  y = y + "}";
500  return y;
501  }
502 
503  std::string p(std::string x);
504  Writer operator+(const Writer &other);
505  Writer operator-(const Writer &other);
506  Writer operator-();
507  Writer operator*(const Writer &other);
508  Writer operator/(const Writer &other);
509 
510  Writer operator*(const Scalar &other);
511  Writer operator+(const Scalar &other);
512 
513  void operator=(const Writer &other);
514  void operator+=(const Writer &other);
515  void operator-=(const Writer &other);
516  void operator*=(const Writer &other);
517  void operator/=(const Writer &other);
518 
519  template <class T>
520  friend Writer &operator<<(Writer &w, const T &v) {
521  *cout << v;
522  return w;
523  }
524  template <class T>
525  friend Writer &operator<<(Writer &w, const std::valarray<T> &x) {
526  *cout << w.vinit(x);
527  return w;
528  }
529 };
530 
531 template <>
532 struct ForwardArgs<Writer> : ForwardArgs<Scalar> {
533  typedef std::vector<Scalar> ScalarVector;
534  typedef ForwardArgs<Scalar> Base;
536  bool const_literals;
538  bool indirect;
539  void set_indirect() {
540  indirect = true;
541  ptr.first = 0;
542  ptr.second = 0;
543  }
544  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
545  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
546  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
547  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
548  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
549  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
550  Writer y_const(Index j) {
551  TMBAD_ASSERT2(!indirect, "Attempt to write constants within loop?");
552  return tostr(Base::y(j));
553  }
554  ForwardArgs(IndexVector &inputs, ScalarVector &values)
555  : ForwardArgs<Scalar>(inputs, values) {
556  const_literals = false;
557  indirect = false;
558  }
559 };
560 
561 template <>
562 struct ReverseArgs<Writer> : Args<> {
563  typedef std::vector<Scalar> ScalarVector;
565  bool const_literals;
567  bool indirect;
568  void set_indirect() {
569  indirect = true;
570  ptr.first = 0;
571  ptr.second = 0;
572  }
573  Writer dxd(Index j) { return "d[" + tostr(input(j)) + "]"; }
574  Writer dyd(Index j) { return "d[" + tostr(output(j)) + "]"; }
575  Writer xd(Index j) { return "v[" + tostr(input(j)) + "]"; }
576  Writer yd(Index j) { return "v[" + tostr(output(j)) + "]"; }
577  Writer dxi(Index j) { return "d[i[" + tostr(Index(ptr.first + j)) + "]]"; }
578  Writer dyi(Index j) { return "d[o[" + tostr(Index(ptr.second + j)) + "]]"; }
579  Writer xi(Index j) { return "v[i[" + tostr(Index(ptr.first + j)) + "]]"; }
580  Writer yi(Index j) { return "v[o[" + tostr(Index(ptr.second + j)) + "]]"; }
581  Writer x(Index j) { return (indirect ? xi(j) : xd(j)); }
582  Writer y(Index j) { return (indirect ? yi(j) : yd(j)); }
583  Writer dx(Index j) { return (indirect ? dxi(j) : dxd(j)); }
584  Writer dy(Index j) { return (indirect ? dyi(j) : dyd(j)); }
585 
586  ReverseArgs(IndexVector &inputs, ScalarVector &values) : Args<>(inputs) {
587  const_literals = false;
588  indirect = false;
589  ptr.first = (Index)inputs.size();
590  ptr.second = (Index)values.size();
591  }
592 };
593 
594 struct Position {
595  Position(Index node, Index first, Index second);
596  Position();
597  Index node;
598  IndexPair ptr;
599  bool operator<(const Position &other) const;
600 };
601 
603 template <class T>
604 void sort_inplace(std::vector<T> &x) {
605  std::sort(x.begin(), x.end());
606 }
607 
609 template <class T>
610 void sort_unique_inplace(std::vector<T> &x) {
611  std::sort(x.begin(), x.end());
612  typename std::vector<T>::iterator last = std::unique(x.begin(), x.end());
613  x.erase(last, x.end());
614 }
615 
617 struct graph {
618  std::vector<Index> j;
619  std::vector<Index> p;
620  graph();
621  size_t num_neighbors(Index node);
622  Index *neighbors(Index node);
623  bool empty();
624  size_t num_nodes();
625  void print();
628  std::vector<bool> mark;
630  std::vector<Index> inv2op;
632  std::vector<Index> dep2op;
634  std::vector<Index> rowcounts();
636  std::vector<Index> colcounts();
646  void bfs(const std::vector<Index> &start, std::vector<bool> &visited,
647  std::vector<Index> &result);
660  void search(std::vector<Index> &start, bool sort_input = true,
661  bool sort_output = true);
669  void search(std::vector<Index> &start, std::vector<bool> &visited,
670  bool sort_input = true, bool sort_output = true);
676  std::vector<Index> boundary(const std::vector<Index> &subgraph);
681  graph(size_t num_nodes, const std::vector<IndexPair> &edges);
682 };
683 
684 namespace {
685 template <class CompleteOperator, bool dynamic>
686 struct constructOperator {};
687 template <class CompleteOperator>
688 struct constructOperator<CompleteOperator, false> {
689  CompleteOperator *operator()() {
690  static CompleteOperator *pOp = new CompleteOperator();
691  return pOp;
692  }
693 };
694 template <class CompleteOperator>
695 struct constructOperator<CompleteOperator, true> {
696  CompleteOperator *operator()() {
697  CompleteOperator *pOp = new CompleteOperator();
698  return pOp;
699  }
700 
701  template <class T1>
702  CompleteOperator *operator()(const T1 &x1) {
703  CompleteOperator *pOp = new CompleteOperator(x1);
704  return pOp;
705  }
706 
707  template <class T1, class T2>
708  CompleteOperator *operator()(const T1 &x1, const T2 &x2) {
709  CompleteOperator *pOp = new CompleteOperator(x1, x2);
710  return pOp;
711  }
712 
713  template <class T1, class T2, class T3>
714  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3) {
715  CompleteOperator *pOp = new CompleteOperator(x1, x2, x3);
716  return pOp;
717  }
718 
719  template <class T1, class T2, class T3, class T4>
720  CompleteOperator *operator()(const T1 &x1, const T2 &x2, const T3 &x3,
721  const T4 &x4) {
722  CompleteOperator *pOp = new CompleteOperator(x1, x2, x3, x4);
723  return pOp;
724  }
725 };
726 } // namespace
727 
732 struct op_info {
734  typedef int IntRep;
736  IntRep code;
738  enum op_flag {
758  op_flag_count
759  };
760  template <class T>
761  IntRep get_flags(T op) {
762  return
763 
764  (op.dynamic * (1 << dynamic)) |
765  (op.smart_pointer * (1 << smart_pointer)) |
766  (op.is_linear * (1 << is_linear)) |
767  (op.is_constant * (1 << is_constant)) |
768  (op.independent_variable * (1 << independent_variable)) |
769  (op.dependent_variable * (1 << dependent_variable)) |
770  (op.allow_remap * (1 << allow_remap)) |
771  (op.elimination_protected * (1 << elimination_protected)) |
772  (op.updating * (1 << updating));
773  }
774  op_info();
775  op_info(op_flag f);
776 
777  template <class T>
778  op_info(T op) : code(get_flags(op)) {}
780  bool test(op_flag f) const;
781  op_info &operator|=(const op_info &other);
782  op_info &operator&=(const op_info &other);
783 };
784 
797 struct global {
798  struct ad_plain;
799  struct ad_aug;
800  typedef TMBAD_REPLAY_TYPE Replay;
801  struct ad_segment;
802  struct print_config;
811  struct OperatorPure {
814  virtual void increment(IndexPair &ptr) = 0;
817  virtual void decrement(IndexPair &ptr) = 0;
819  virtual void forward(ForwardArgs<Scalar> &args) = 0;
821  virtual void reverse(ReverseArgs<Scalar> &args) = 0;
823  virtual void forward_incr(ForwardArgs<Scalar> &args) = 0;
825  virtual void reverse_decr(ReverseArgs<Scalar> &args) = 0;
827  virtual Index input_size() = 0;
829  virtual Index output_size() = 0;
834  virtual void forward(ForwardArgs<bool> &args) = 0;
839  virtual void reverse(ReverseArgs<bool> &args) = 0;
841  virtual void forward_incr(ForwardArgs<bool> &args) = 0;
843  virtual void reverse_decr(ReverseArgs<bool> &args) = 0;
845  virtual void forward_incr_mark_dense(ForwardArgs<bool> &args) = 0;
859  virtual void dependencies(Args<> &args, Dependencies &dep) = 0;
863  virtual void dependencies_updating(Args<> &args, Dependencies &dep) = 0;
865  virtual void forward(ForwardArgs<Replay> &args) = 0;
867  virtual void reverse(ReverseArgs<Replay> &args) = 0;
869  virtual void forward_incr(ForwardArgs<Replay> &args) = 0;
871  virtual void reverse_decr(ReverseArgs<Replay> &args) = 0;
873  virtual void forward(ForwardArgs<Writer> &args) = 0;
875  virtual void reverse(ReverseArgs<Writer> &args) = 0;
877  virtual void forward_incr(ForwardArgs<Writer> &args) = 0;
879  virtual void reverse_decr(ReverseArgs<Writer> &args) = 0;
881  virtual const char *op_name() { return "NoName"; }
885  virtual OperatorPure *self_fuse() = 0;
889  virtual OperatorPure *other_fuse(OperatorPure *other) = 0;
891  virtual OperatorPure *copy() = 0;
893  virtual void deallocate() = 0;
895  virtual op_info info() = 0;
897  virtual void *operator_data() = 0;
902  virtual void *identifier() = 0;
904  virtual void print(print_config cfg) = 0;
907  virtual void *incomplete() = 0;
908  virtual ~OperatorPure() {}
909  };
910 
917  struct operation_stack : std::vector<OperatorPure *> {
918  typedef std::vector<OperatorPure *> Base;
922  operation_stack();
924  operation_stack(const operation_stack &other);
927  void push_back(OperatorPure *x);
929  operation_stack &operator=(const operation_stack &other);
930  ~operation_stack();
932  void clear();
933  void copy_from(const operation_stack &other);
934  };
935 
940  std::vector<Scalar> values;
943  std::vector<Scalar> derivs;
945  IndexVector inputs;
948  std::vector<Index> inv_index;
951  std::vector<Index> dep_index;
952 
953  mutable std::vector<IndexPair> subgraph_ptr;
954  std::vector<Index> subgraph_seq;
956  void (*forward_compiled)(Scalar *);
958  void (*reverse_compiled)(Scalar *, Scalar *);
959 
960  global();
963  void clear();
964 
980  void shrink_to_fit(double tol = .9);
981 
985  void clear_deriv(Position start = Position(0, 0, 0));
986 
988  Scalar &value_inv(Index i);
990  Scalar &deriv_inv(Index i);
992  Scalar &value_dep(Index i);
994  Scalar &deriv_dep(Index i);
996  Position begin();
998  Position end();
999 
1001  struct no_filter {
1002  CONSTEXPR bool operator[](size_t i) const;
1003  };
1009  template <class ForwardArgs, class NodeFilter>
1010  void forward_loop(ForwardArgs &args, size_t begin,
1011  const NodeFilter &node_filter) const {
1012  for (size_t i = begin; i < opstack.size(); i++) {
1013  if (node_filter[i])
1014  opstack[i]->forward_incr(args);
1015  else
1016  opstack[i]->increment(args.ptr);
1017  }
1018  }
1020  template <class ForwardArgs>
1021  void forward_loop(ForwardArgs &args, size_t begin = 0) const {
1022  forward_loop(args, begin, no_filter());
1023  }
1028  template <class ReverseArgs, class NodeFilter>
1029  void reverse_loop(ReverseArgs &args, size_t begin,
1030  const NodeFilter &node_filter) const {
1031  for (size_t i = opstack.size(); i > begin;) {
1032  i--;
1033  if (node_filter[i])
1034  opstack[i]->reverse_decr(args);
1035  else
1036  opstack[i]->decrement(args.ptr);
1037  }
1038  }
1040  template <class ReverseArgs>
1041  void reverse_loop(ReverseArgs &args, size_t begin = 0) const {
1042  reverse_loop(args, begin, no_filter());
1043  }
1045  template <class ForwardArgs>
1047  subgraph_cache_ptr();
1048  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1049  Index i = subgraph_seq[j];
1050  args.ptr = subgraph_ptr[i];
1051  opstack[i]->forward(args);
1052  }
1053  }
1055  template <class ReverseArgs>
1057  subgraph_cache_ptr();
1058  for (size_t j = subgraph_seq.size(); j > 0;) {
1059  j--;
1060  Index i = subgraph_seq[j];
1061  args.ptr = subgraph_ptr[i];
1062  opstack[i]->reverse(args);
1063  }
1064  }
1075  template <class Vector>
1077  typename Vector::value_type value =
1078  typename Vector::value_type(0)) const {
1079  if (array.size() != values.size()) {
1080  array.resize(values.size());
1081  std::fill(array.begin(), array.end(), value);
1082  return;
1083  }
1084  subgraph_cache_ptr();
1085  for (size_t j = 0; j < subgraph_seq.size(); j++) {
1086  Index i = subgraph_seq[j];
1087  size_t noutput = opstack[i]->output_size();
1088  for (size_t k = 0; k < noutput; k++)
1089  array[subgraph_ptr[i].second + k] = value;
1090  }
1091  }
1092 
1097  void forward(Position start = Position(0, 0, 0));
1105  void reverse(Position start = Position(0, 0, 0));
1107  void forward_sub();
1109  void reverse_sub();
1110 
1112  void forward(std::vector<bool> &marks);
1114  void reverse(std::vector<bool> &marks);
1119  void forward_sub(std::vector<bool> &marks,
1120  const std::vector<bool> &node_filter = std::vector<bool>());
1125  void reverse_sub(std::vector<bool> &marks,
1126  const std::vector<bool> &node_filter = std::vector<bool>());
1135  void forward_dense(std::vector<bool> &marks);
1136 
1137  intervals<Index> updating_intervals() const;
1138 
1139  intervals<Index> updating_intervals_sub() const;
1140 
1141  struct replay {
1143  std::vector<Replay> values;
1146  std::vector<Replay> derivs;
1148  const global &orig;
1150  global &target;
1152  global *parent_glob;
1154  Replay &value_inv(Index i);
1156  Replay &deriv_inv(Index i);
1158  Replay &value_dep(Index i);
1160  Replay &deriv_dep(Index i);
1164  replay(const global &orig, global &target);
1173  void start();
1178  void stop();
1180  void add_updatable_derivs(const intervals<Index> &I);
1182  void clear_deriv();
1189  void forward(bool inv_tags = true, bool dep_tags = true,
1190  Position start = Position(0, 0, 0),
1191  const std::vector<bool> &node_filter = std::vector<bool>());
1199  void reverse(bool dep_tags = true, bool inv_tags = false,
1200  Position start = Position(0, 0, 0),
1201  const std::vector<bool> &node_filter = std::vector<bool>());
1203  void forward_sub();
1205  void reverse_sub();
1207  void clear_deriv_sub();
1208  };
1209 
1214  void forward_replay(bool inv_tags = true, bool dep_tags = true);
1215 
1221  void subgraph_cache_ptr() const;
1229  void set_subgraph(const std::vector<bool> &marks, bool append = false);
1231  void mark_subgraph(std::vector<bool> &marks);
1233  void unmark_subgraph(std::vector<bool> &marks);
1235  void subgraph_trivial();
1241  void clear_deriv_sub();
1274  global extract_sub(std::vector<Index> &var_remap, global new_glob = global());
1279  void extract_sub_inplace(std::vector<bool> marks);
1283  global extract_sub();
1284 
1293  std::vector<Index> var2op();
1299  std::vector<bool> var2op(const std::vector<bool> &values);
1301  std::vector<Index> op2var(const std::vector<Index> &seq);
1303  std::vector<bool> op2var(const std::vector<bool> &seq_mark);
1312  std::vector<Index> op2idx(const std::vector<Index> &var_subset,
1313  Index NA = (Index)-1);
1315  std::vector<bool> mark_space(size_t n, const std::vector<Index> ind);
1317  std::vector<bool> inv_marks();
1319  std::vector<bool> dep_marks();
1321  std::vector<bool> subgraph_marks();
1322 
1323  struct append_edges {
1324  size_t &i;
1325  const std::vector<bool> &keep_var;
1326  std::vector<Index> &var2op;
1327  std::vector<IndexPair> &edges;
1328 
1329  std::vector<bool> op_marks;
1330  size_t pos;
1331  append_edges(size_t &i, size_t num_nodes, const std::vector<bool> &keep_var,
1332  std::vector<Index> &var2op, std::vector<IndexPair> &edges);
1333  void operator()(Index dep_j);
1334 
1335  void start_iteration();
1336 
1337  void end_iteration();
1338  };
1347  graph build_graph(bool transpose, const std::vector<bool> &keep_var);
1351  graph forward_graph(std::vector<bool> keep_var = std::vector<bool>(0));
1355  graph reverse_graph(std::vector<bool> keep_var = std::vector<bool>(0));
1356 
1361  bool identical(const global &other) const;
1362 
1364  template <class T>
1365  void hash(hash_t &h, T x) const {
1366  static const size_t n =
1367  (sizeof(T) / sizeof(hash_t)) + (sizeof(T) % sizeof(hash_t) != 0);
1368  hash_t buffer[n];
1369  std::fill(buffer, buffer + n, 0);
1370  for (size_t i = 0; i < sizeof(x); i++)
1371  ((char *)buffer)[i] = ((char *)&x)[i];
1372  hash_t A = 54059;
1373  hash_t B = 76963;
1374  for (size_t i = 0; i < n; i++) h = (A * h) ^ (B * buffer[i]);
1375  }
1376 
1385  hash_t hash() const;
1386 
1388  struct hash_config {
1398  bool reduce;
1402  std::vector<Index> inv_seed;
1403  };
1404 
1459  std::vector<hash_t> hash_sweep(hash_config cfg) const;
1461  std::vector<hash_t> hash_sweep(bool weak = true) const;
1462 
1476  void eliminate();
1477 
1479  struct print_config {
1480  std::string prefix, mark;
1481  int depth;
1482  print_config();
1483  };
1485  void print(print_config cfg);
1487  void print();
1488 
1490  template <int ninput_, int noutput_ = 1>
1491  struct Operator {
1493  static const bool dynamic = false;
1495  static const int ninput = ninput_;
1497  static const int noutput = noutput_;
1499  static const int independent_variable = false;
1501  static const int dependent_variable = false;
1503  static const bool have_input_size_output_size = false;
1505  static const bool have_increment_decrement = false;
1507  static const bool have_forward_reverse = true;
1509  static const bool have_forward_incr_reverse_decr = false;
1511  static const bool have_forward_mark_reverse_mark = false;
1513  static const bool have_dependencies = false;
1519  static const bool allow_remap = true;
1530  static const bool implicit_dependencies = false;
1532  static const bool add_static_identifier = false;
1535  static const bool add_forward_replay_copy = false;
1538  static const bool have_eval = false;
1540  static const int max_fuse_depth = 2;
1542  static const bool is_linear = false;
1544  static const bool is_constant = false;
1546  static const bool smart_pointer = false;
1548  static const bool elimination_protected = false;
1574  static const bool updating = false;
1577  void dependencies_updating(Args<> &args, Dependencies &dep) const {}
1580  return NULL;
1581  }
1583  void *operator_data() { return NULL; }
1585  void print(print_config cfg) {}
1586  };
1589  template <int ninput, int noutput>
1590  struct DynamicOperator : Operator<ninput, noutput> {
1592  static const bool dynamic = true;
1594  static const int max_fuse_depth = 0;
1595  };
1598  template <int ninput>
1599  struct DynamicOutputOperator : Operator<ninput, -1> {
1601  static const bool dynamic = true;
1603  static const int max_fuse_depth = 0;
1604  Index noutput;
1605  };
1606  template <int noutput = 1>
1607  struct DynamicInputOperator : Operator<-1, noutput> {
1609  static const bool dynamic = true;
1611  static const int max_fuse_depth = 0;
1612  Index ninput;
1613  };
1614  struct DynamicInputOutputOperator : Operator<-1, -1> {
1616  static const bool dynamic = true;
1618  static const int max_fuse_depth = 0;
1619  Index ninput_, noutput_;
1620  DynamicInputOutputOperator(Index ninput, Index noutput);
1621  Index input_size() const;
1622  Index output_size() const;
1623  static const bool have_input_size_output_size = true;
1624  };
1625  struct UniqueDynamicOperator : Operator<-1, -1> {
1627  static const bool dynamic = true;
1629  static const int max_fuse_depth = 0;
1631  static const bool smart_pointer = false;
1634  static const bool have_input_size_output_size = true;
1635  };
1636  struct SharedDynamicOperator : UniqueDynamicOperator {
1638  static const bool smart_pointer = true;
1639  };
1640 
1643  template <class OperatorBase>
1644  struct AddInputSizeOutputSize : OperatorBase {
1645  INHERIT_CTOR(AddInputSizeOutputSize, OperatorBase)
1646  Index input_size() const { return this->ninput; }
1647  Index output_size() const { return this->noutput; }
1648  static const bool have_input_size_output_size = true;
1649  };
1650 
1653  template <class OperatorBase>
1654  struct AddIncrementDecrement : OperatorBase {
1655  INHERIT_CTOR(AddIncrementDecrement, OperatorBase)
1656  void increment(IndexPair &ptr) {
1657  ptr.first += this->input_size();
1658  ptr.second += this->output_size();
1659  }
1660  void decrement(IndexPair &ptr) {
1661  ptr.first -= this->input_size();
1662  ptr.second -= this->output_size();
1663  }
1664  static const bool have_increment_decrement = true;
1665  };
1666 
1670  template <class OperatorBase>
1671  struct AddForwardReverse : OperatorBase {
1672  INHERIT_CTOR(AddForwardReverse, OperatorBase)
1673 
1674  template <class Type>
1675  void forward(ForwardArgs<Type> &args) {
1676  ForwardArgs<Type> args_cpy(args);
1677  OperatorBase::forward_incr(args_cpy);
1678  }
1679  template <class Type>
1680  void reverse(ReverseArgs<Type> &args) {
1681  ReverseArgs<Type> args_cpy(args);
1682  OperatorBase::increment(args_cpy.ptr);
1683  OperatorBase::reverse_decr(args_cpy);
1684  }
1685  static const bool have_forward_reverse = true;
1686  };
1687 
1691  template <class OperatorBase>
1692  struct AddForwardIncrReverseDecr : OperatorBase {
1693  INHERIT_CTOR(AddForwardIncrReverseDecr, OperatorBase)
1694 
1695  template <class Type>
1696  void forward_incr(ForwardArgs<Type> &args) {
1697  OperatorBase::forward(args);
1698  OperatorBase::increment(args.ptr);
1699  }
1700 
1701  template <class Type>
1702  void reverse_decr(ReverseArgs<Type> &args) {
1703  OperatorBase::decrement(args.ptr);
1704  OperatorBase::reverse(args);
1705  }
1706  static const bool have_forward_incr_reverse_decr = true;
1707  };
1708 
1711  template <class OperatorBase>
1712  struct AddForwardMarkReverseMark : OperatorBase {
1713  INHERIT_CTOR(AddForwardMarkReverseMark, OperatorBase)
1714 
1715  template <class Type>
1716  void forward(ForwardArgs<Type> &args) {
1717  OperatorBase::forward(args);
1718  }
1719  template <class Type>
1720  void reverse(ReverseArgs<Type> &args) {
1721  OperatorBase::reverse(args);
1722  }
1723 
1724  void forward(ForwardArgs<bool> &args) { args.mark_dense(*this); }
1725  void reverse(ReverseArgs<bool> &args) { args.mark_dense(*this); }
1726  static const bool have_forward_mark_reverse_mark = true;
1727  };
1728 
1731  template <class OperatorBase>
1732  struct AddDependencies : OperatorBase {
1733  INHERIT_CTOR(AddDependencies, OperatorBase)
1734  void dependencies(Args<> &args, Dependencies &dep) const {
1735  Index ninput_ = this->input_size();
1736  for (Index j = 0; j < ninput_; j++) dep.push_back(args.input(j));
1737  }
1738  static const bool have_dependencies = true;
1739  };
1740 
1743  template <class OperatorBase, int ninput>
1744  struct AddForwardFromEval : OperatorBase {};
1746  template <class OperatorBase>
1747  struct AddForwardFromEval<OperatorBase, 1> : OperatorBase {
1748  INHERIT_CTOR(AddForwardFromEval, OperatorBase)
1749  template <class Type>
1750  void forward(ForwardArgs<Type> &args) {
1751  args.y(0) = this->eval(args.x(0));
1752  }
1753  };
1755  template <class OperatorBase>
1756  struct AddForwardFromEval<OperatorBase, 2> : OperatorBase {
1757  INHERIT_CTOR(AddForwardFromEval, OperatorBase)
1758  template <class Type>
1759  void forward(ForwardArgs<Type> &args) {
1760  args.y(0) = this->eval(args.x(0), args.x(1));
1761  }
1762  };
1763 
1765  template <bool flag, class dummy>
1767  void increment() {}
1768  void decrement() {}
1769  size_t operator()() const { return 0; }
1770  };
1771  template <class dummy>
1772  struct ReferenceCounter<true, dummy> {
1773  size_t counter;
1774  ReferenceCounter() : counter(0) {}
1775  void increment() { counter++; }
1776  void decrement() { counter--; }
1777  size_t operator()() const { return counter; }
1778  };
1779 
1781  template <bool flag, class Yes, class No>
1782  struct if_else {};
1783  template <class Yes, class No>
1784  struct if_else<true, Yes, No> {
1785  typedef Yes type;
1786  };
1787  template <class Yes, class No>
1788  struct if_else<false, Yes, No> {
1789  typedef No type;
1790  };
1791 
1793  template <class OperatorBase>
1794  struct CPL {
1795  static const bool test1 = !OperatorBase::have_eval;
1797  typedef typename if_else<
1798  test1, OperatorBase,
1800 
1801  static const bool test2 = Result1::have_input_size_output_size;
1803  typedef
1806 
1807  static const bool test3 = !Result2::have_dependencies;
1809  typedef typename if_else<test3, AddDependencies<Result2>, Result2>::type
1811 
1812  static const bool test4 = Result3::have_increment_decrement;
1814  typedef
1817 
1818  static const bool test5 = Result4::have_forward_mark_reverse_mark;
1820  typedef typename if_else<test5, Result4,
1822 
1823  static const bool test6 = Result5::have_forward_reverse &&
1824  !Result5::have_forward_incr_reverse_decr;
1827  Result5>::type Result6;
1828 
1829  static const bool test7 = Result6::have_forward_incr_reverse_decr &&
1830  !Result6::have_forward_reverse;
1832  typedef typename if_else<test7, AddForwardReverse<Result6>, Result6>::type
1834 
1835  typedef Result7 type;
1836  };
1837 
1839  template <class Operator1, class Operator2>
1840  struct Fused : Operator<Operator1::ninput + Operator2::ninput,
1841  Operator1::noutput + Operator2::noutput> {
1842  typename CPL<Operator1>::type Op1;
1843  typename CPL<Operator2>::type Op2;
1845  static const int independent_variable =
1846  Operator1::independent_variable && Operator2::independent_variable;
1848  static const int dependent_variable =
1849  Operator1::dependent_variable && Operator2::dependent_variable;
1851  static const int max_fuse_depth =
1852  (Operator1::max_fuse_depth < Operator2::max_fuse_depth
1853  ? Operator1::max_fuse_depth - 1
1854  : Operator2::max_fuse_depth - 1);
1856  static const bool is_linear = Operator1::is_linear && Operator2::is_linear;
1857  template <class Type>
1858  void forward_incr(ForwardArgs<Type> &args) {
1859  Op1.forward_incr(args);
1860  Op2.forward_incr(args);
1861  }
1862  template <class Type>
1863  void reverse_decr(ReverseArgs<Type> &args) {
1864  Op2.reverse_decr(args);
1865  Op1.reverse_decr(args);
1866  }
1868  static const bool have_forward_incr_reverse_decr = true;
1870  static const bool have_forward_reverse = false;
1871  const char *op_name() { return "Fused"; }
1872  };
1881  template <class Operator1>
1882  struct Rep : DynamicOperator<-1, -1> {
1883  typename CPL<Operator1>::type Op;
1885  static const int independent_variable = Operator1::independent_variable;
1887  static const int dependent_variable = Operator1::dependent_variable;
1889  static const bool is_linear = Operator1::is_linear;
1890  Index n;
1891  Rep(Index n) : n(n) {}
1892  Index input_size() const { return Operator1::ninput * n; }
1893  Index output_size() const { return Operator1::noutput * n; }
1895  static const bool have_input_size_output_size = true;
1896  template <class Type>
1897  void forward_incr(ForwardArgs<Type> &args) {
1898  for (size_t i = 0; i < (size_t)n; i++) Op.forward_incr(args);
1899  }
1900  template <class Type>
1901  void reverse_decr(ReverseArgs<Type> &args) {
1902  for (size_t i = 0; i < (size_t)n; i++) Op.reverse_decr(args);
1903  }
1905  static const bool have_forward_incr_reverse_decr = true;
1907  static const bool have_forward_reverse = false;
1914  TMBAD_ASSERT(false);
1915  std::vector<Index> &inputs = get_glob()->inputs;
1916  size_t k = Op.input_size();
1917  size_t start = inputs.size() - k * n;
1918  std::valarray<Index> increment(k);
1919  if (k > 0) {
1920  for (size_t i = 0; i < (size_t)n - 1; i++) {
1921  std::valarray<Index> v1(&inputs[start + i * k], k);
1922  std::valarray<Index> v2(&inputs[start + (i + 1) * k], k);
1923  if (i == 0) {
1924  increment = v2 - v1;
1925  } else {
1926  bool ok = (increment == (v2 - v1)).min();
1927  if (!ok) return NULL;
1928  }
1929  }
1930  }
1931 
1932  size_t reduction = (n - 1) * k;
1933  inputs.resize(inputs.size() - reduction);
1934  return get_glob()->getOperator<RepCompress<Operator1> >(n, increment);
1935  }
1936  OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
1937  OperatorPure *op1 = get_glob()->getOperator<Operator1>();
1938  if (op1 == other) {
1939  this->n++;
1940  return self;
1941  }
1942  return NULL;
1943  }
1944  const char *op_name() { return "Rep"; }
1945  };
1956  template <class Operator1>
1957  struct RepCompress : DynamicOperator<-1, -1> {
1959  static const int independent_variable = Operator1::independent_variable;
1961  static const int dependent_variable = Operator1::dependent_variable;
1963  static const bool is_linear = Operator1::is_linear;
1964  typename CPL<Operator1>::type Op;
1965  Index n;
1966 
1967  std::valarray<Index> increment_pattern;
1968  RepCompress(Index n, std::valarray<Index> v) : n(n), increment_pattern(v) {}
1969  Index input_size() const { return Operator1::ninput; }
1970  Index output_size() const { return Operator1::noutput * n; }
1972  static const bool have_input_size_output_size = true;
1974  template <class Type>
1976  std::valarray<Index> inputs(input_size());
1977  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
1978  ForwardArgs<Type> args_cpy = args;
1979  args_cpy.inputs = &inputs[0];
1980  args_cpy.ptr.first = 0;
1981  for (size_t i = 0; i < (size_t)n; i++) {
1982  Op.forward(args_cpy);
1983  inputs += this->increment_pattern;
1984  args_cpy.ptr.second += Op.output_size();
1985  }
1986  }
1988  template <class Type>
1990  std::valarray<Index> inputs(input_size());
1991  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
1992  inputs += n * this->increment_pattern;
1993  ReverseArgs<Type> args_cpy = args;
1994  args_cpy.inputs = &inputs[0];
1995  args_cpy.ptr.first = 0;
1996  args_cpy.ptr.second += n * Op.output_size();
1997  for (size_t i = 0; i < (size_t)n; i++) {
1998  inputs -= this->increment_pattern;
1999  args_cpy.ptr.second -= Op.output_size();
2000  Op.reverse(args_cpy);
2001  }
2002  }
2004  void dependencies(Args<> &args, Dependencies &dep) const {
2005  std::valarray<Index> inputs(input_size());
2006  for (size_t i = 0; i < inputs.size(); i++) inputs[i] = args.input(i);
2007  for (size_t i = 0; i < (size_t)n; i++) {
2008  dep.insert(dep.end(), &inputs[0], &inputs[0] + inputs.size());
2009  inputs += this->increment_pattern;
2010  }
2011  }
2012  static const bool have_dependencies = true;
2013  void forward(ForwardArgs<Writer> &args) {
2014  std::valarray<Index> inputs(Op.input_size());
2015  for (size_t i = 0; i < (size_t)Op.input_size(); i++)
2016  inputs[i] = args.input(i);
2017  std::valarray<Index> outputs(Op.output_size());
2018  for (size_t i = 0; i < (size_t)Op.output_size(); i++)
2019  outputs[i] = args.output(i);
2020  Writer w;
2021  int ninp = Op.input_size();
2022  int nout = Op.output_size();
2023 
2024  w << "for (int count = 0, "
2025  << "i[" << ninp << "]=" << inputs << ", "
2026  << "di[" << ninp << "]=" << increment_pattern << ", "
2027  << "o[" << nout << "]=" << outputs << "; "
2028  << "count < " << n << "; count++) {\n";
2029 
2030  w << " ";
2031  ForwardArgs<Writer> args_cpy = args;
2032  args_cpy.set_indirect();
2033  Op.forward(args_cpy);
2034  w << "\n";
2035 
2036  w << " ";
2037  w << "for (int k=0; k<" << ninp << "; k++) i[k] += di[k];\n";
2038  w << " ";
2039  w << "for (int k=0; k<" << nout << "; k++) o[k] += " << nout << ";\n";
2040 
2041  w << " ";
2042  w << "}";
2043  }
2044  void reverse(ReverseArgs<Writer> &args) {
2045  std::valarray<Index> inputs(Op.input_size());
2046  for (size_t i = 0; i < (size_t)Op.input_size(); i++)
2047  inputs[i] = args.input(i);
2048  inputs += n * increment_pattern;
2049  std::valarray<Index> outputs(Op.output_size());
2050  for (size_t i = 0; i < (size_t)Op.output_size(); i++)
2051  outputs[i] = args.output(i);
2052  outputs += n * Op.output_size();
2053  Writer w;
2054  int ninp = Op.input_size();
2055  int nout = Op.output_size();
2056 
2057  w << "for (int count = 0, "
2058  << "i[" << ninp << "]=" << inputs << ", "
2059  << "di[" << ninp << "]=" << increment_pattern << ", "
2060  << "o[" << nout << "]=" << outputs << "; "
2061  << "count < " << n << "; count++) {\n";
2062 
2063  w << " ";
2064  w << "for (int k=0; k<" << ninp << "; k++) i[k] -= di[k];\n";
2065  w << " ";
2066  w << "for (int k=0; k<" << nout << "; k++) o[k] -= " << nout << ";\n";
2067 
2068  w << " ";
2069  ReverseArgs<Writer> args_cpy = args;
2070  args_cpy.set_indirect();
2071  Op.reverse(args_cpy);
2072  w << "\n";
2073 
2074  w << " ";
2075  w << "}";
2076  }
2078  static const bool have_forward_incr_reverse_decr = false;
2080  static const bool have_forward_reverse = true;
2082  static const bool have_forward_mark_reverse_mark = true;
2083  const char *op_name() { return "CRep"; }
2084 
2085  struct operator_data_t {
2086  OperatorPure *Op;
2087  Index n;
2088  std::valarray<Index> ip;
2089  operator_data_t(const RepCompress &x)
2090  : Op(get_glob()->getOperator<Operator1>()),
2091  n(x.n),
2092  ip(x.increment_pattern) {}
2093  ~operator_data_t() { Op->deallocate(); }
2094  bool operator==(const operator_data_t &other) {
2095  return (Op == other.Op) && (ip.size() == other.ip.size()) &&
2096  ((ip - other.ip).min() == 0);
2097  }
2098  };
2099  void *operator_data() { return new operator_data_t(*this); }
2100  OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
2101  if (this->op_name() == other->op_name()) {
2102  operator_data_t *p1 =
2103  static_cast<operator_data_t *>(self->operator_data());
2104  operator_data_t *p2 =
2105  static_cast<operator_data_t *>(other->operator_data());
2106  bool match = (*p1 == *p2);
2107  int other_n = p2->n;
2108  delete p1;
2109  delete p2;
2110  if (match) {
2111  std::vector<Index> &inputs = get_glob()->inputs;
2112  size_t reduction = increment_pattern.size();
2113  inputs.resize(inputs.size() - reduction);
2114  this->n += other_n;
2115  other->deallocate();
2116  return self;
2117  }
2118  }
2119  return NULL;
2120  }
2121  };
2122 
2128  template <class OperatorBase>
2130  typename CPL<OperatorBase>::type Op;
2131  INHERIT_CTOR(Complete, Op)
2132  ~Complete() {}
2133  void forward(ForwardArgs<Scalar> &args) { Op.forward(args); }
2134  void reverse(ReverseArgs<Scalar> &args) { Op.reverse(args); }
2135  void forward_incr(ForwardArgs<Scalar> &args) { Op.forward_incr(args); }
2136  void reverse_decr(ReverseArgs<Scalar> &args) { Op.reverse_decr(args); }
2137 
2139  if (Op.add_forward_replay_copy)
2140  forward_replay_copy(args);
2141  else
2142  Op.forward(args);
2143  }
2144  void reverse(ReverseArgs<Replay> &args) { Op.reverse(args); }
2146  if (Op.add_forward_replay_copy) {
2147  forward_replay_copy(args);
2148  increment(args.ptr);
2149  } else
2150  Op.forward_incr(args);
2151  }
2152  void reverse_decr(ReverseArgs<Replay> &args) { Op.reverse_decr(args); }
2153 
2154  void forward(ForwardArgs<bool> &args) { Op.forward(args); }
2155  void reverse(ReverseArgs<bool> &args) { Op.reverse(args); }
2156  void forward_incr(ForwardArgs<bool> &args) { Op.forward_incr(args); }
2157  void reverse_decr(ReverseArgs<bool> &args) { Op.reverse_decr(args); }
2159  args.mark_dense(Op);
2160  Op.increment(args.ptr);
2161  };
2162 
2163  void forward(ForwardArgs<Writer> &args) { Op.forward(args); }
2164  void reverse(ReverseArgs<Writer> &args) { Op.reverse(args); }
2165  void forward_incr(ForwardArgs<Writer> &args) { Op.forward_incr(args); }
2166  void reverse_decr(ReverseArgs<Writer> &args) { Op.reverse_decr(args); }
2171  std::vector<ad_plain> operator()(const std::vector<ad_plain> &x) {
2172  TMBAD_ASSERT2(OperatorBase::dynamic,
2173  "Stack to heap copy only allowed for dynamic operators");
2174  Complete *pOp = new Complete(*this);
2175  TMBAD_ASSERT2(pOp->ref_count() == 0, "Operator already on the heap");
2176  pOp->ref_count.increment();
2177  return get_glob()->add_to_stack<OperatorBase>(pOp, x);
2178  }
2179  ad_segment operator()(const ad_segment &x) {
2180  TMBAD_ASSERT2(OperatorBase::dynamic,
2181  "Stack to heap copy only allowed for dynamic operators");
2182  Complete *pOp = new Complete(*this);
2183  TMBAD_ASSERT2(pOp->ref_count() == 0, "Operator already on the heap");
2184  pOp->ref_count.increment();
2185  return get_glob()->add_to_stack<OperatorBase>(pOp, x);
2186  }
2187  ad_segment operator()(const ad_segment &x, const ad_segment &y) {
2188  TMBAD_ASSERT2(OperatorBase::dynamic,
2189  "Stack to heap copy only allowed for dynamic operators");
2190  Complete *pOp = new Complete(*this);
2191  TMBAD_ASSERT2(pOp->ref_count() == 0, "Operator already on the heap");
2192  pOp->ref_count.increment();
2193  return get_glob()->add_to_stack<OperatorBase>(pOp, x, y);
2194  }
2195  template <class T>
2196  std::vector<T> operator()(const std::vector<T> &x) {
2197  std::vector<ad_plain> x_(x.begin(), x.end());
2198  std::vector<ad_plain> y_ = (*this)(x_);
2199  std::vector<T> y(y_.begin(), y_.end());
2200  return y;
2201  }
2202  void forward_replay_copy(ForwardArgs<Replay> &args) {
2203  std::vector<ad_plain> x(Op.input_size());
2204  for (size_t i = 0; i < x.size(); i++) x[i] = args.x(i);
2205  std::vector<ad_plain> y =
2206  get_glob()->add_to_stack<OperatorBase>(this->copy(), x);
2207  for (size_t i = 0; i < y.size(); i++) args.y(i) = y[i];
2208  }
2209  void dependencies(Args<> &args, Dependencies &dep) {
2210  Op.dependencies(args, dep);
2211  }
2212  void dependencies_updating(Args<> &args, Dependencies &dep) {
2213  Op.dependencies_updating(args, dep);
2214  }
2215  void increment(IndexPair &ptr) { Op.increment(ptr); }
2216  void decrement(IndexPair &ptr) { Op.decrement(ptr); }
2217  Index input_size() { return Op.input_size(); }
2218  Index output_size() { return Op.output_size(); }
2219  const char *op_name() { return Op.op_name(); }
2220  void print(print_config cfg) { Op.print(cfg); }
2221 
2222  template <class Operator_, int depth>
2223  struct SelfFuse {
2224  typedef Rep<Operator_> type;
2225  OperatorPure *operator()() {
2226  return get_glob()->template getOperator<type>(2);
2227  }
2228  };
2229  template <class Operator_>
2230  struct SelfFuse<Operator_, 0> {
2231  OperatorPure *operator()() { return NULL; }
2232  };
2234  return SelfFuse<OperatorBase, OperatorBase::max_fuse_depth>()();
2235  }
2237  return Op.other_fuse(this, other);
2238  }
2241  if (Op.smart_pointer) {
2242  ref_count.increment();
2243  return this;
2244  } else if (Op.dynamic)
2245  return new Complete(*this);
2246  else
2247  return this;
2248  }
2249  void deallocate() {
2250  if (!Op.dynamic) return;
2251  if (Op.smart_pointer) {
2252  if (ref_count() > 1) {
2253  ref_count.decrement();
2254  return;
2255  }
2256  }
2257  delete this;
2258  }
2260  op_info info(Op);
2261  return info;
2262  }
2263  void *identifier() {
2264  if (Op.add_static_identifier) {
2265  static void *id = new char();
2266  return id;
2267  } else
2268  return (void *)this;
2269  }
2270  void *operator_data() { return Op.operator_data(); }
2271  void *incomplete() { return &Op; }
2272  };
2273 
2274  template <class OperatorBase>
2275  Complete<OperatorBase> *getOperator() const {
2276  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()();
2277  }
2278  template <class OperatorBase, class T1>
2279  Complete<OperatorBase> *getOperator(const T1 &x1) const {
2280  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2281  x1);
2282  }
2283  template <class OperatorBase, class T1, class T2>
2284  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2) const {
2285  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2286  x1, x2);
2287  }
2288  template <class OperatorBase, class T1, class T2, class T3>
2289  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2,
2290  const T3 &x3) const {
2291  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2292  x1, x2, x3);
2293  }
2294  template <class OperatorBase, class T1, class T2, class T3, class T4>
2295  Complete<OperatorBase> *getOperator(const T1 &x1, const T2 &x2, const T3 &x3,
2296  const T4 &x4) const {
2297  return constructOperator<Complete<OperatorBase>, OperatorBase::dynamic>()(
2298  x1, x2, x3, x4);
2299  }
2300  struct InvOp : Operator<0> {
2301  static const int independent_variable = true;
2302  template <class Type>
2303  void forward(ForwardArgs<Type> &args) {}
2304  template <class Type>
2305  void reverse(ReverseArgs<Type> &args) {}
2306  const char *op_name();
2307  };
2308 
2309  struct DepOp : Operator<1> {
2310  static const bool is_linear = true;
2311  static const int dependent_variable = true;
2312  static const bool have_eval = true;
2313  template <class Type>
2314  Type eval(Type x0) {
2315  return x0;
2316  }
2317  template <class Type>
2318  void reverse(ReverseArgs<Type> &args) {
2319  args.dx(0) += args.dy(0);
2320  }
2321  const char *op_name();
2322  };
2323 
2324  struct ConstOp : Operator<0, 1> {
2325  static const bool is_linear = true;
2326  static const bool is_constant = true;
2327  template <class Type>
2328  void forward(ForwardArgs<Type> &args) {}
2329  void forward(ForwardArgs<Replay> &args);
2330  template <class Type>
2331  void reverse(ReverseArgs<Type> &args) {}
2332  const char *op_name();
2333  void forward(ForwardArgs<Writer> &args);
2334  };
2335  struct DataOp : DynamicOutputOperator<0> {
2336  typedef DynamicOutputOperator<0> Base;
2337  static const bool is_linear = true;
2338  DataOp(Index n);
2339  template <class Type>
2340  void forward(ForwardArgs<Type> &args) {}
2341  template <class Type>
2342  void reverse(ReverseArgs<Type> &args) {}
2343  const char *op_name();
2344  void forward(ForwardArgs<Writer> &args);
2345  };
2356  static const bool add_forward_replay_copy = true;
2357  ZeroOp(Index n);
2358  template <class Type>
2359  void forward(ForwardArgs<Type> &args) {
2360  for (Index i = 0; i < Base::noutput; i++) args.y(i) = Type(0);
2361  }
2362  template <class Type>
2363  void reverse(ReverseArgs<Type> &args) {}
2364  const char *op_name();
2365  void forward(ForwardArgs<Writer> &args);
2368  void operator()(Replay *x, Index n);
2369  };
2371  struct NullOp : Operator<0, 0> {
2372  NullOp();
2373  const char *op_name();
2374  template <class T>
2375  void forward(ForwardArgs<T> &args) {}
2376  template <class T>
2377  void reverse(ReverseArgs<T> &args) {}
2378  };
2380  struct NullOp2 : DynamicInputOutputOperator {
2381  NullOp2(Index ninput, Index noutput);
2382  const char *op_name();
2383  template <class T>
2384  void forward(ForwardArgs<T> &args) {}
2385  template <class T>
2386  void reverse(ReverseArgs<T> &args) {}
2387  };
2408  struct RefOp : DynamicOperator<0, 1> {
2409  static const bool dynamic = true;
2410  global *glob;
2411  Index i;
2412  RefOp(global *glob, Index i);
2414  void forward(ForwardArgs<Scalar> &args);
2416  void forward(ForwardArgs<Replay> &args);
2419  template <class Type>
2421  TMBAD_ASSERT2(false,
2422  "Reverse mode updates are forbidden until all references "
2423  "are resolved");
2424  }
2426  void reverse(ReverseArgs<Replay> &args);
2427  const char *op_name();
2428  };
2429 
2430  typedef Operator<1> UnaryOperator;
2431  typedef Operator<2> BinaryOperator;
2432 
2433  OperatorPure *Fuse(OperatorPure *Op1, OperatorPure *Op2);
2434 
2435  static bool fuse;
2436 
2441  void set_fuse(bool flag);
2442 
2445  void add_to_opstack(OperatorPure *pOp);
2447  template <class OperatorBase>
2448  ad_plain add_to_stack(Scalar result = 0) {
2449  ad_plain ans;
2450  ans.index = this->values.size();
2451 
2452  this->values.push_back(result);
2453 
2454  Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
2455  add_to_opstack(pOp);
2456 
2457  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2458  return ans;
2459  }
2461  template <class OperatorBase>
2462  ad_plain add_to_stack(const ad_plain &x) {
2463  ad_plain ans;
2464  ans.index = this->values.size();
2465 
2466  this->values.push_back(OperatorBase().eval(x.Value()));
2467 
2468  this->inputs.push_back(x.index);
2469 
2470  Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
2471  add_to_opstack(pOp);
2472 
2473  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2474  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2475  return ans;
2476  }
2478  template <class OperatorBase>
2479  ad_plain add_to_stack(const ad_plain &x, const ad_plain &y) {
2480  ad_plain ans;
2481  ans.index = this->values.size();
2482 
2483  this->values.push_back(OperatorBase().eval(x.Value(), y.Value()));
2484 
2485  this->inputs.push_back(x.index);
2486  this->inputs.push_back(y.index);
2487 
2488  Complete<OperatorBase> *pOp = this->template getOperator<OperatorBase>();
2489  add_to_opstack(pOp);
2490 
2491  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2492  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2493  return ans;
2494  }
2495  template <class OperatorBase>
2496  ad_segment add_to_stack(ad_segment lhs, ad_segment rhs,
2497  ad_segment more = ad_segment()) {
2498  IndexPair ptr((Index)inputs.size(), (Index)values.size());
2499  Complete<OperatorBase> *pOp =
2500  this->template getOperator<OperatorBase>(lhs, rhs);
2501  size_t n = pOp->output_size();
2502  ad_segment ans(values.size(), n);
2503  inputs.push_back(lhs.index());
2504  inputs.push_back(rhs.index());
2505  if (more.size() > 0) inputs.push_back(more.index());
2506  opstack.push_back(pOp);
2507  values.resize(values.size() + n);
2508  ForwardArgs<Scalar> args(inputs, values, this);
2509  args.ptr = ptr;
2510  pOp->forward(args);
2511 
2512  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2513  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2514  return ans;
2515  }
2516 
2517  template <class OperatorBase>
2518  ad_segment add_to_stack(Complete<OperatorBase> *pOp, ad_segment lhs,
2519  ad_segment rhs = ad_segment()) {
2520  static_assert(
2521  OperatorBase::dynamic,
2522  "Unlikely that you want to use this method for static operators?");
2523  static_assert(
2524  OperatorBase::ninput == 0 || OperatorBase::implicit_dependencies,
2525  "Operators with pointer inputs should always implement "
2526  "'implicit_dependencies'");
2527 
2528  IndexPair ptr((Index)inputs.size(), (Index)values.size());
2529  size_t n = pOp->output_size();
2530  ad_segment ans(values.size(), n);
2531  TMBAD_ASSERT((Index)(lhs.size() > 0) + (Index)(rhs.size() > 0) ==
2532  pOp->input_size());
2533  if (lhs.size() > 0) inputs.push_back(lhs.index());
2534  if (rhs.size() > 0) inputs.push_back(rhs.index());
2535  opstack.push_back(pOp);
2536  values.resize(values.size() + n);
2537  ForwardArgs<Scalar> args(inputs, values, this);
2538  args.ptr = ptr;
2539  pOp->forward(args);
2540 
2541  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2542  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2543  return ans;
2544  }
2547  template <class OperatorBase>
2548  std::vector<ad_plain> add_to_stack(OperatorPure *pOp,
2549  const std::vector<ad_plain> &x) {
2550  IndexPair ptr((Index)inputs.size(), (Index)values.size());
2551  size_t m = pOp->input_size();
2552  size_t n = pOp->output_size();
2553  ad_segment ans(values.size(), n);
2554  for (size_t i = 0; i < m; i++) inputs.push_back(x[i].index);
2555  opstack.push_back(pOp);
2556  values.resize(values.size() + n);
2557  ForwardArgs<Scalar> args(inputs, values, this);
2558  args.ptr = ptr;
2559  pOp->forward(args);
2560 
2561  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(values.size()));
2562  TMBAD_ASSERT(!TMBAD_INDEX_OVERFLOW(inputs.size()));
2563  std::vector<ad_plain> out(n);
2564  for (size_t i = 0; i < n; i++) out[i].index = ans.index() + i;
2565  return out;
2566  }
2567 
2568  struct ad_plain {
2569  Index index;
2570  static const Index NA = (Index)-1;
2571  bool initialized() const;
2572  bool on_some_tape() const;
2574  void addToTape() const;
2576  global *glob() const;
2580  void override_by(const ad_plain &x) const;
2581 
2586  ad_plain();
2587 
2589  ad_plain(Scalar x);
2591  ad_plain(ad_aug x);
2592 
2594  struct CopyOp : Operator<1> {
2595  static const bool have_eval = true;
2596  template <class Type>
2597  Type eval(Type x0) {
2598  return x0;
2599  }
2600  Replay eval(Replay x0);
2601  template <class Type>
2602  void reverse(ReverseArgs<Type> &args) {
2603  args.dx(0) += args.dy(0);
2604  }
2605  const char *op_name();
2606  };
2614  ad_plain copy() const;
2625  struct ValOp : Operator<1> {
2626  static const bool have_dependencies = true;
2627  static const bool have_eval = true;
2629  template <class Type>
2630  Type eval(Type x0) {
2631  return x0;
2632  }
2633  Replay eval(Replay x0);
2635  template <class Type>
2643  void dependencies(Args<> &args, Dependencies &dep) const;
2644  const char *op_name();
2645  };
2649  ad_plain copy0() const;
2650 
2651  template <bool left_var, bool right_var>
2652  struct AddOp_ : BinaryOperator {
2653  static const bool is_linear = true;
2654  static const bool have_eval = true;
2655  template <class Type>
2656  Type eval(Type x0, Type x1) {
2657  return x0 + x1;
2658  }
2659  template <class Type>
2660  void reverse(ReverseArgs<Type> &args) {
2661  if (left_var) args.dx(0) += args.dy(0);
2662  if (right_var) args.dx(1) += args.dy(0);
2663  }
2664  const char *op_name() { return "AddOp"; }
2665  OperatorPure *other_fuse(OperatorPure *self, OperatorPure *other) {
2666  if (other == get_glob()->getOperator<MulOp>()) {
2667  return get_glob()->getOperator<Fused<AddOp_, MulOp> >();
2668  }
2669  return NULL;
2670  }
2671  };
2672  typedef AddOp_<true, true> AddOp;
2673  ad_plain operator+(const ad_plain &other) const;
2674 
2675  template <bool left_var, bool right_var>
2676  struct SubOp_ : BinaryOperator {
2677  static const bool is_linear = true;
2678  static const bool have_eval = true;
2679  template <class Type>
2680  Type eval(Type x0, Type x1) {
2681  return x0 - x1;
2682  }
2683  template <class Type>
2684  void reverse(ReverseArgs<Type> &args) {
2685  if (left_var) args.dx(0) += args.dy(0);
2686  if (right_var) args.dx(1) -= args.dy(0);
2687  }
2688  const char *op_name() { return "SubOp"; }
2689  };
2690  typedef SubOp_<true, true> SubOp;
2691  ad_plain operator-(const ad_plain &other) const;
2692 
2693  template <bool left_var, bool right_var>
2694  struct MulOp_ : BinaryOperator {
2695  static const bool have_eval = true;
2696  static const bool is_linear = !left_var || !right_var;
2697  template <class Type>
2698  Type eval(Type x0, Type x1) {
2699  return x0 * x1;
2700  }
2701  template <class Type>
2702  void reverse(ReverseArgs<Type> &args) {
2703  if (left_var) args.dx(0) += args.x(1) * args.dy(0);
2704  if (right_var) args.dx(1) += args.x(0) * args.dy(0);
2705  }
2706  const char *op_name() { return "MulOp"; }
2707  };
2708  typedef MulOp_<true, true> MulOp;
2709  ad_plain operator*(const ad_plain &other) const;
2710  ad_plain operator*(const Scalar &other) const;
2711 
2712  template <bool left_var, bool right_var>
2713  struct DivOp_ : BinaryOperator {
2714  static const bool have_eval = true;
2715  template <class Type>
2716  Type eval(Type x0, Type x1) {
2717  return x0 / x1;
2718  }
2719  template <class Type>
2720  void reverse(ReverseArgs<Type> &args) {
2721  Type tmp0 = args.dy(0) / args.x(1);
2722  if (left_var) args.dx(0) += tmp0;
2723  if (right_var) args.dx(1) -= args.y(0) * tmp0;
2724  }
2725  const char *op_name() { return "DivOp"; }
2726  };
2727  typedef DivOp_<true, true> DivOp;
2728  ad_plain operator/(const ad_plain &other) const;
2729 
2730  struct NegOp : UnaryOperator {
2731  static const bool is_linear = true;
2732  static const bool have_eval = true;
2733  template <class Type>
2734  Type eval(Type x0) {
2735  return -x0;
2736  }
2737  template <class Type>
2738  void reverse(ReverseArgs<Type> &args) {
2739  args.dx(0) -= args.dy(0);
2740  }
2741  const char *op_name();
2742  };
2743  ad_plain operator-() const;
2744 
2745  ad_plain &operator+=(const ad_plain &other);
2746  ad_plain &operator-=(const ad_plain &other);
2747  ad_plain &operator*=(const ad_plain &other);
2748  ad_plain &operator/=(const ad_plain &other);
2749 
2750  void Dependent();
2751 
2752  void Independent();
2753  Scalar &Value();
2754  Scalar Value() const;
2755  Scalar Value(global *glob) const;
2756  Scalar &Deriv();
2757  };
2765  bool in_use;
2769  void ad_start();
2771  void ad_stop();
2772  void Independent(std::vector<ad_plain> &x);
2780  struct ad_segment {
2781  ad_plain x;
2782  size_t n;
2783  size_t c;
2785  ad_segment();
2787  ad_segment(ad_plain x, size_t n);
2789  ad_segment(ad_aug x);
2791  ad_segment(Scalar x);
2793  ad_segment(Index idx, size_t n);
2795  ad_segment(ad_plain x, size_t r, size_t c);
2798  ad_segment(Replay *x, size_t n, bool zero_check = false);
2799  bool identicalZero();
2800  bool all_on_active_tape(Replay *x, size_t n);
2801  bool is_contiguous(Replay *x, size_t n);
2802  bool all_zero(Replay *x, size_t n);
2803  bool all_constant(Replay *x, size_t n);
2804  size_t size() const;
2805  size_t rows() const;
2806  size_t cols() const;
2807 
2808  ad_plain operator[](size_t i) const;
2809  ad_plain offset() const;
2810  Index index() const;
2811  };
2831  struct ad_aug {
2834  mutable ad_plain taped_value;
2838  TMBAD_UNION_OR_STRUCT {
2839  Scalar value;
2840  mutable global *glob;
2841  }
2842  data;
2844  bool on_some_tape() const;
2846  bool on_active_tape() const;
2848  bool ontape() const;
2852  bool constant() const;
2853  Index index() const;
2859  global *glob() const;
2861  Scalar Value() const;
2865  ad_aug();
2869  ad_aug(Scalar x);
2871  ad_aug(ad_plain x);
2876  void addToTape() const;
2880  void override_by(const ad_plain &x) const;
2882  bool in_context_stack(global *glob) const;
2885  ad_aug copy() const;
2887  ad_aug copy0() const;
2890  bool identicalZero() const;
2893  bool identicalOne() const;
2897  bool bothConstant(const ad_aug &other) const;
2901  bool identical(const ad_aug &other) const;
2906  ad_aug operator+(const ad_aug &other) const;
2912  ad_aug operator-(const ad_aug &other) const;
2914  ad_aug operator-() const;
2921  ad_aug operator*(const ad_aug &other) const;
2926  ad_aug operator/(const ad_aug &other) const;
2929  ad_aug &operator+=(const ad_aug &other);
2932  ad_aug &operator-=(const ad_aug &other);
2935  ad_aug &operator*=(const ad_aug &other);
2938  ad_aug &operator/=(const ad_aug &other);
2940  void Dependent();
2942  void Independent();
2943  Scalar &Value();
2944  Scalar &Deriv();
2945  };
2946  void Independent(std::vector<ad_aug> &x);
2947 };
2948 
2949 template <class S, class T>
2950 std::ostream &operator<<(std::ostream &os, const std::pair<S, T> &x) {
2951  os << "(" << x.first << ", " << x.second << ")";
2952  return os;
2953 }
2954 
2955 std::ostream &operator<<(std::ostream &os, const global::ad_plain &x);
2956 std::ostream &operator<<(std::ostream &os, const global::ad_aug &x);
2957 
2968 template <class T>
2969 struct adaptive : T {
2970  INHERIT_CTOR(adaptive, T)
2971  bool operator==(const T &other) const {
2972  return this->Value() == other.Value();
2973  }
2974  bool operator!=(const T &other) const {
2975  return this->Value() != other.Value();
2976  }
2977  bool operator>=(const T &other) const {
2978  return this->Value() >= other.Value();
2979  }
2980  bool operator<=(const T &other) const {
2981  return this->Value() <= other.Value();
2982  }
2983  bool operator<(const T &other) const { return this->Value() < other.Value(); }
2984  bool operator>(const T &other) const { return this->Value() > other.Value(); }
2985 
2986  adaptive operator+(const T &other) const {
2987  return adaptive(T(*this) + other);
2988  }
2989  adaptive operator-(const T &other) const {
2990  return adaptive(T(*this) - other);
2991  }
2992  adaptive operator*(const T &other) const {
2993  return adaptive(T(*this) * other);
2994  }
2995  adaptive operator/(const T &other) const {
2996  return adaptive(T(*this) / other);
2997  }
2998 
2999  adaptive operator-() const { return adaptive(-(T(*this))); }
3000 };
3001 
3002 typedef global::ad_plain ad_plain;
3003 typedef global::ad_aug ad_aug;
3004 typedef global::Replay Replay;
3005 typedef adaptive<ad_aug> ad_adapt;
3014 struct ad_plain_index : ad_plain {
3015  ad_plain_index(const Index &i);
3016  ad_plain_index(const ad_plain &x);
3017 };
3018 struct ad_aug_index : ad_aug {
3019  ad_aug_index(const Index &i);
3020  ad_aug_index(const ad_aug &x);
3021  ad_aug_index(const ad_plain &x);
3022 };
3023 
3024 template <class T>
3025 void Independent(std::vector<T> &x) {
3026  for (size_t i = 0; i < x.size(); i++) x[i].Independent();
3027 }
3028 template <class T>
3029 void Dependent(std::vector<T> &x) {
3030  for (size_t i = 0; i < x.size(); i++) x[i].Dependent();
3031 }
3032 template <class T>
3033 Scalar Value(T x) {
3034  return x.Value();
3035 }
3036 Scalar Value(Scalar x);
3037 
3044 template <class V>
3045 bool isContiguous(V &x) {
3046  bool ok = true;
3047  Index j_previous;
3048  for (size_t i = 0; i < (size_t)x.size(); i++) {
3049  if (!x[i].on_some_tape()) {
3050  ok = false;
3051  break;
3052  }
3053  Index j = ad_plain(x[i]).index;
3054  if (i > 0) {
3055  if (j != j_previous + 1) {
3056  ok = false;
3057  break;
3058  }
3059  }
3060  j_previous = j;
3061  }
3062  return ok;
3063 }
3070 template <class V>
3071 V getContiguous(const V &x) {
3072  V y(x.size());
3073  for (size_t i = 0; i < (size_t)x.size(); i++) y[i] = x[i].copy();
3074  return y;
3075 }
3082 template <class V>
3083 void forceContiguous(V &x) {
3084  if (!isContiguous(x)) x = getContiguous(x);
3085 }
3086 ad_aug operator+(const double &x, const ad_aug &y);
3087 ad_aug operator-(const double &x, const ad_aug &y);
3088 ad_aug operator*(const double &x, const ad_aug &y);
3089 ad_aug operator/(const double &x, const ad_aug &y);
3090 
3091 bool operator<(const double &x, const ad_adapt &y);
3092 bool operator<=(const double &x, const ad_adapt &y);
3093 bool operator>(const double &x, const ad_adapt &y);
3094 bool operator>=(const double &x, const ad_adapt &y);
3095 bool operator==(const double &x, const ad_adapt &y);
3096 bool operator!=(const double &x, const ad_adapt &y);
3097 using ::round;
3098 using ::trunc;
3099 using std::ceil;
3100 using std::floor;
3101 Writer floor(const Writer &x);
3102 struct FloorOp : global::UnaryOperator {
3103  static const bool have_eval = true;
3104  template <class Type>
3105  Type eval(Type x) {
3106  return floor(x);
3107  }
3108  template <class Type>
3109  void reverse(ReverseArgs<Type> &args) {}
3110  const char *op_name();
3111 };
3112 ad_plain floor(const ad_plain &x);
3113 ad_aug floor(const ad_aug &x);
3114 Writer ceil(const Writer &x);
3115 struct CeilOp : global::UnaryOperator {
3116  static const bool have_eval = true;
3117  template <class Type>
3118  Type eval(Type x) {
3119  return ceil(x);
3120  }
3121  template <class Type>
3122  void reverse(ReverseArgs<Type> &args) {}
3123  const char *op_name();
3124 };
3125 ad_plain ceil(const ad_plain &x);
3126 ad_aug ceil(const ad_aug &x);
3127 Writer trunc(const Writer &x);
3128 struct TruncOp : global::UnaryOperator {
3129  static const bool have_eval = true;
3130  template <class Type>
3131  Type eval(Type x) {
3132  return trunc(x);
3133  }
3134  template <class Type>
3135  void reverse(ReverseArgs<Type> &args) {}
3136  const char *op_name();
3137 };
3138 ad_plain trunc(const ad_plain &x);
3139 ad_aug trunc(const ad_aug &x);
3140 Writer round(const Writer &x);
3141 struct RoundOp : global::UnaryOperator {
3142  static const bool have_eval = true;
3143  template <class Type>
3144  Type eval(Type x) {
3145  return round(x);
3146  }
3147  template <class Type>
3148  void reverse(ReverseArgs<Type> &args) {}
3149  const char *op_name();
3150 };
3151 ad_plain round(const ad_plain &x);
3152 ad_aug round(const ad_aug &x);
3153 
3154 double sign(const double &x);
3155 Writer sign(const Writer &x);
3156 struct SignOp : global::UnaryOperator {
3157  static const bool have_eval = true;
3158  template <class Type>
3159  Type eval(Type x) {
3160  return sign(x);
3161  }
3162  template <class Type>
3163  void reverse(ReverseArgs<Type> &args) {}
3164  const char *op_name();
3165 };
3166 ad_plain sign(const ad_plain &x);
3167 ad_aug sign(const ad_aug &x);
3168 
3169 double ge0(const double &x);
3170 double lt0(const double &x);
3171 Writer ge0(const Writer &x);
3172 struct Ge0Op : global::UnaryOperator {
3173  static const bool have_eval = true;
3174  template <class Type>
3175  Type eval(Type x) {
3176  return ge0(x);
3177  }
3178  template <class Type>
3179  void reverse(ReverseArgs<Type> &args) {}
3180  const char *op_name();
3181 };
3182 ad_plain ge0(const ad_plain &x);
3183 ad_aug ge0(const ad_aug &x);
3184 Writer lt0(const Writer &x);
3185 struct Lt0Op : global::UnaryOperator {
3186  static const bool have_eval = true;
3187  template <class Type>
3188  Type eval(Type x) {
3189  return lt0(x);
3190  }
3191  template <class Type>
3192  void reverse(ReverseArgs<Type> &args) {}
3193  const char *op_name();
3194 };
3195 ad_plain lt0(const ad_plain &x);
3196 ad_aug lt0(const ad_aug &x);
3197 using ::expm1;
3198 using ::fabs;
3199 using ::log1p;
3200 using std::acos;
3201 using std::acosh;
3202 using std::asin;
3203 using std::asinh;
3204 using std::atan;
3205 using std::atanh;
3206 using std::cos;
3207 using std::cosh;
3208 using std::exp;
3209 using std::log;
3210 using std::sin;
3211 using std::sinh;
3212 using std::sqrt;
3213 using std::tan;
3214 using std::tanh;
3215 
3216 Writer fabs(const Writer &x);
3217 struct AbsOp : global::UnaryOperator {
3218  static const bool have_eval = true;
3219  template <class Type>
3220  Type eval(Type x) {
3221  return fabs(x);
3222  }
3223  template <class Type>
3224  void reverse(ReverseArgs<Type> &args) {
3225  args.dx(0) += args.dy(0) * sign(args.x(0));
3226  }
3227  void reverse(ReverseArgs<Scalar> &args);
3228  const char *op_name();
3229 };
3230 ad_plain fabs(const ad_plain &x);
3231 ad_aug fabs(const ad_aug &x);
3232 ad_adapt fabs(const ad_adapt &x);
3233 Writer cos(const Writer &x);
3234 ad_aug cos(const ad_aug &x);
3235 Writer sin(const Writer &x);
3236 struct SinOp : global::UnaryOperator {
3237  static const bool have_eval = true;
3238  template <class Type>
3239  Type eval(Type x) {
3240  return sin(x);
3241  }
3242  template <class Type>
3243  void reverse(ReverseArgs<Type> &args) {
3244  args.dx(0) += args.dy(0) * cos(args.x(0));
3245  }
3246  void reverse(ReverseArgs<Scalar> &args);
3247  const char *op_name();
3248 };
3249 ad_plain sin(const ad_plain &x);
3250 ad_aug sin(const ad_aug &x);
3251 ad_adapt sin(const ad_adapt &x);
3252 Writer cos(const Writer &x);
3253 struct CosOp : global::UnaryOperator {
3254  static const bool have_eval = true;
3255  template <class Type>
3256  Type eval(Type x) {
3257  return cos(x);
3258  }
3259  template <class Type>
3260  void reverse(ReverseArgs<Type> &args) {
3261  args.dx(0) += args.dy(0) * -sin(args.x(0));
3262  }
3263  void reverse(ReverseArgs<Scalar> &args);
3264  const char *op_name();
3265 };
3266 ad_plain cos(const ad_plain &x);
3267 ad_aug cos(const ad_aug &x);
3268 ad_adapt cos(const ad_adapt &x);
3269 Writer exp(const Writer &x);
3270 struct ExpOp : global::UnaryOperator {
3271  static const bool have_eval = true;
3272  template <class Type>
3273  Type eval(Type x) {
3274  return exp(x);
3275  }
3276  template <class Type>
3277  void reverse(ReverseArgs<Type> &args) {
3278  args.dx(0) += args.dy(0) * args.y(0);
3279  }
3280  void reverse(ReverseArgs<Scalar> &args);
3281  const char *op_name();
3282 };
3283 ad_plain exp(const ad_plain &x);
3284 ad_aug exp(const ad_aug &x);
3285 ad_adapt exp(const ad_adapt &x);
3286 Writer log(const Writer &x);
3287 struct LogOp : global::UnaryOperator {
3288  static const bool have_eval = true;
3289  template <class Type>
3290  Type eval(Type x) {
3291  return log(x);
3292  }
3293  template <class Type>
3294  void reverse(ReverseArgs<Type> &args) {
3295  args.dx(0) += args.dy(0) * Type(1.) / args.x(0);
3296  }
3297  void reverse(ReverseArgs<Scalar> &args);
3298  const char *op_name();
3299 };
3300 ad_plain log(const ad_plain &x);
3301 ad_aug log(const ad_aug &x);
3302 ad_adapt log(const ad_adapt &x);
3303 Writer sqrt(const Writer &x);
3304 struct SqrtOp : global::UnaryOperator {
3305  static const bool have_eval = true;
3306  template <class Type>
3307  Type eval(Type x) {
3308  return sqrt(x);
3309  }
3310  template <class Type>
3311  void reverse(ReverseArgs<Type> &args) {
3312  args.dx(0) += args.dy(0) * Type(0.5) / args.y(0);
3313  }
3314  void reverse(ReverseArgs<Scalar> &args);
3315  const char *op_name();
3316 };
3317 ad_plain sqrt(const ad_plain &x);
3318 ad_aug sqrt(const ad_aug &x);
3319 ad_adapt sqrt(const ad_adapt &x);
3320 Writer tan(const Writer &x);
3321 struct TanOp : global::UnaryOperator {
3322  static const bool have_eval = true;
3323  template <class Type>
3324  Type eval(Type x) {
3325  return tan(x);
3326  }
3327  template <class Type>
3328  void reverse(ReverseArgs<Type> &args) {
3329  args.dx(0) += args.dy(0) * Type(1.) / (cos(args.x(0)) * cos(args.x(0)));
3330  }
3331  void reverse(ReverseArgs<Scalar> &args);
3332  const char *op_name();
3333 };
3334 ad_plain tan(const ad_plain &x);
3335 ad_aug tan(const ad_aug &x);
3336 ad_adapt tan(const ad_adapt &x);
3337 Writer cosh(const Writer &x);
3338 ad_aug cosh(const ad_aug &x);
3339 Writer sinh(const Writer &x);
3340 struct SinhOp : global::UnaryOperator {
3341  static const bool have_eval = true;
3342  template <class Type>
3343  Type eval(Type x) {
3344  return sinh(x);
3345  }
3346  template <class Type>
3347  void reverse(ReverseArgs<Type> &args) {
3348  args.dx(0) += args.dy(0) * cosh(args.x(0));
3349  }
3350  void reverse(ReverseArgs<Scalar> &args);
3351  const char *op_name();
3352 };
3353 ad_plain sinh(const ad_plain &x);
3354 ad_aug sinh(const ad_aug &x);
3355 ad_adapt sinh(const ad_adapt &x);
3356 Writer cosh(const Writer &x);
3357 struct CoshOp : global::UnaryOperator {
3358  static const bool have_eval = true;
3359  template <class Type>
3360  Type eval(Type x) {
3361  return cosh(x);
3362  }
3363  template <class Type>
3364  void reverse(ReverseArgs<Type> &args) {
3365  args.dx(0) += args.dy(0) * sinh(args.x(0));
3366  }
3367  void reverse(ReverseArgs<Scalar> &args);
3368  const char *op_name();
3369 };
3370 ad_plain cosh(const ad_plain &x);
3371 ad_aug cosh(const ad_aug &x);
3372 ad_adapt cosh(const ad_adapt &x);
3373 Writer tanh(const Writer &x);
3374 struct TanhOp : global::UnaryOperator {
3375  static const bool have_eval = true;
3376  template <class Type>
3377  Type eval(Type x) {
3378  return tanh(x);
3379  }
3380  template <class Type>
3381  void reverse(ReverseArgs<Type> &args) {
3382  args.dx(0) += args.dy(0) * Type(1.) / (cosh(args.x(0)) * cosh(args.x(0)));
3383  }
3384  void reverse(ReverseArgs<Scalar> &args);
3385  const char *op_name();
3386 };
3387 ad_plain tanh(const ad_plain &x);
3388 ad_aug tanh(const ad_aug &x);
3389 ad_adapt tanh(const ad_adapt &x);
3390 Writer expm1(const Writer &x);
3391 struct Expm1 : global::UnaryOperator {
3392  static const bool have_eval = true;
3393  template <class Type>
3394  Type eval(Type x) {
3395  return expm1(x);
3396  }
3397  template <class Type>
3398  void reverse(ReverseArgs<Type> &args) {
3399  args.dx(0) += args.dy(0) * args.y(0) + Type(1.);
3400  }
3401  void reverse(ReverseArgs<Scalar> &args);
3402  const char *op_name();
3403 };
3404 ad_plain expm1(const ad_plain &x);
3405 ad_aug expm1(const ad_aug &x);
3406 ad_adapt expm1(const ad_adapt &x);
3407 Writer log1p(const Writer &x);
3408 struct Log1p : global::UnaryOperator {
3409  static const bool have_eval = true;
3410  template <class Type>
3411  Type eval(Type x) {
3412  return log1p(x);
3413  }
3414  template <class Type>
3415  void reverse(ReverseArgs<Type> &args) {
3416  args.dx(0) += args.dy(0) * Type(1.) / (args.x(0) + Type(1.));
3417  }
3418  void reverse(ReverseArgs<Scalar> &args);
3419  const char *op_name();
3420 };
3421 ad_plain log1p(const ad_plain &x);
3422 ad_aug log1p(const ad_aug &x);
3423 ad_adapt log1p(const ad_adapt &x);
3424 Writer asin(const Writer &x);
3425 struct AsinOp : global::UnaryOperator {
3426  static const bool have_eval = true;
3427  template <class Type>
3428  Type eval(Type x) {
3429  return asin(x);
3430  }
3431  template <class Type>
3432  void reverse(ReverseArgs<Type> &args) {
3433  args.dx(0) +=
3434  args.dy(0) * Type(1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
3435  }
3436  void reverse(ReverseArgs<Scalar> &args);
3437  const char *op_name();
3438 };
3439 ad_plain asin(const ad_plain &x);
3440 ad_aug asin(const ad_aug &x);
3441 ad_adapt asin(const ad_adapt &x);
3442 Writer acos(const Writer &x);
3443 struct AcosOp : global::UnaryOperator {
3444  static const bool have_eval = true;
3445  template <class Type>
3446  Type eval(Type x) {
3447  return acos(x);
3448  }
3449  template <class Type>
3450  void reverse(ReverseArgs<Type> &args) {
3451  args.dx(0) +=
3452  args.dy(0) * Type(-1.) / sqrt(Type(1.) - args.x(0) * args.x(0));
3453  }
3454  void reverse(ReverseArgs<Scalar> &args);
3455  const char *op_name();
3456 };
3457 ad_plain acos(const ad_plain &x);
3458 ad_aug acos(const ad_aug &x);
3459 ad_adapt acos(const ad_adapt &x);
3460 Writer atan(const Writer &x);
3461 struct AtanOp : global::UnaryOperator {
3462  static const bool have_eval = true;
3463  template <class Type>
3464  Type eval(Type x) {
3465  return atan(x);
3466  }
3467  template <class Type>
3468  void reverse(ReverseArgs<Type> &args) {
3469  args.dx(0) += args.dy(0) * Type(1.) / (Type(1.) + args.x(0) * args.x(0));
3470  }
3471  void reverse(ReverseArgs<Scalar> &args);
3472  const char *op_name();
3473 };
3474 ad_plain atan(const ad_plain &x);
3475 ad_aug atan(const ad_aug &x);
3476 ad_adapt atan(const ad_adapt &x);
3477 Writer asinh(const Writer &x);
3478 struct AsinhOp : global::UnaryOperator {
3479  static const bool have_eval = true;
3480  template <class Type>
3481  Type eval(Type x) {
3482  return asinh(x);
3483  }
3484  template <class Type>
3485  void reverse(ReverseArgs<Type> &args) {
3486  args.dx(0) +=
3487  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) + Type(1.));
3488  }
3489  void reverse(ReverseArgs<Scalar> &args);
3490  const char *op_name();
3491 };
3492 ad_plain asinh(const ad_plain &x);
3493 ad_aug asinh(const ad_aug &x);
3494 ad_adapt asinh(const ad_adapt &x);
3495 Writer acosh(const Writer &x);
3496 struct AcoshOp : global::UnaryOperator {
3497  static const bool have_eval = true;
3498  template <class Type>
3499  Type eval(Type x) {
3500  return acosh(x);
3501  }
3502  template <class Type>
3503  void reverse(ReverseArgs<Type> &args) {
3504  args.dx(0) +=
3505  args.dy(0) * Type(1.) / sqrt(args.x(0) * args.x(0) - Type(1.));
3506  }
3507  void reverse(ReverseArgs<Scalar> &args);
3508  const char *op_name();
3509 };
3510 ad_plain acosh(const ad_plain &x);
3511 ad_aug acosh(const ad_aug &x);
3512 ad_adapt acosh(const ad_adapt &x);
3513 Writer atanh(const Writer &x);
3514 struct AtanhOp : global::UnaryOperator {
3515  static const bool have_eval = true;
3516  template <class Type>
3517  Type eval(Type x) {
3518  return atanh(x);
3519  }
3520  template <class Type>
3521  void reverse(ReverseArgs<Type> &args) {
3522  args.dx(0) += args.dy(0) * Type(1.) / (Type(1) - args.x(0) * args.x(0));
3523  }
3524  void reverse(ReverseArgs<Scalar> &args);
3525  const char *op_name();
3526 };
3527 ad_plain atanh(const ad_plain &x);
3528 ad_aug atanh(const ad_aug &x);
3529 ad_adapt atanh(const ad_adapt &x);
3530 
3531 template <class T>
3532 T abs(const T &x) {
3533  return fabs(x);
3534 }
3535 using std::pow;
3536 Writer pow(const Writer &x1, const Writer &x2);
3537 struct PowOp : global::BinaryOperator {
3538  static const bool have_eval = true;
3539  template <class Type>
3540  Type eval(Type x1, Type x2) {
3541  return pow(x1, x2);
3542  }
3543  template <class Type>
3544  void reverse(ReverseArgs<Type> &args) {
3545  args.dx(0) += args.dy(0) * args.x(1) * pow(args.x(0), args.x(1) - Type(1.));
3546  args.dx(1) += args.dy(0) * args.y(0) * log(args.x(0));
3547  }
3548  const char *op_name();
3549 };
3550 ad_plain pow(const ad_plain &x1, const ad_plain &x2);
3551 ad_aug pow(const ad_aug &x1, const ad_aug &x2);
3552 ad_adapt pow(const ad_adapt &x1, const ad_adapt &x2);
3553 using std::atan2;
3554 Writer atan2(const Writer &x1, const Writer &x2);
3555 struct Atan2 : global::BinaryOperator {
3556  static const bool have_eval = true;
3557  template <class Type>
3558  Type eval(Type x1, Type x2) {
3559  return atan2(x1, x2);
3560  }
3561  template <class Type>
3562  void reverse(ReverseArgs<Type> &args) {
3563  args.dx(0) += args.dy(0) * args.x(1) /
3564  (args.x(0) * args.x(0) + args.x(1) * args.x(1));
3565  args.dx(1) += args.dy(0) * -args.x(0) /
3566  (args.x(0) * args.x(0) + args.x(1) * args.x(1));
3567  }
3568  const char *op_name();
3569 };
3570 ad_plain atan2(const ad_plain &x1, const ad_plain &x2);
3571 ad_aug atan2(const ad_aug &x1, const ad_aug &x2);
3572 ad_adapt atan2(const ad_adapt &x1, const ad_adapt &x2);
3573 using std::max;
3574 Writer max(const Writer &x1, const Writer &x2);
3575 struct MaxOp : global::BinaryOperator {
3576  static const bool have_eval = true;
3577  template <class Type>
3578  Type eval(Type x1, Type x2) {
3579  return max(x1, x2);
3580  }
3581  template <class Type>
3582  void reverse(ReverseArgs<Type> &args) {
3583  args.dx(0) += args.dy(0) * ge0(args.x(0) - args.x(1));
3584  args.dx(1) += args.dy(0) * lt0(args.x(0) - args.x(1));
3585  }
3586  const char *op_name();
3587 };
3588 ad_plain max(const ad_plain &x1, const ad_plain &x2);
3589 ad_aug max(const ad_aug &x1, const ad_aug &x2);
3590 ad_adapt max(const ad_adapt &x1, const ad_adapt &x2);
3591 
3592 using std::min;
3593 Writer min(const Writer &x1, const Writer &x2);
3594 struct MinOp : global::BinaryOperator {
3595  static const bool have_eval = true;
3596  template <class Type>
3597  Type eval(Type x1, Type x2) {
3598  return min(x1, x2);
3599  }
3600  template <class Type>
3601  void reverse(ReverseArgs<Type> &args) {
3602  args.dx(0) += args.dy(0) * ge0(args.x(1) - args.x(0));
3603  args.dx(1) += args.dy(0) * lt0(args.x(1) - args.x(0));
3604  }
3605  const char *op_name();
3606 };
3607 ad_plain min(const ad_plain &x1, const ad_plain &x2);
3608 ad_aug min(const ad_aug &x1, const ad_aug &x2);
3609 ad_adapt min(const ad_adapt &x1, const ad_adapt &x2);
3610 Replay CondExpEq(const Replay &x0, const Replay &x1, const Replay &x2,
3611  const Replay &x3);
3612 struct CondExpEqOp : global::Operator<4, 1> {
3613  void forward(ForwardArgs<Scalar> &args);
3614  void reverse(ReverseArgs<Scalar> &args);
3615  void forward(ForwardArgs<Replay> &args);
3616  void reverse(ReverseArgs<Replay> &args);
3617  void forward(ForwardArgs<Writer> &args);
3618  void reverse(ReverseArgs<Writer> &args);
3619  template <class Type>
3620  void forward(ForwardArgs<Type> &args) {
3621  TMBAD_ASSERT(false);
3622  }
3623  template <class Type>
3624  void reverse(ReverseArgs<Type> &args) {
3625  TMBAD_ASSERT(false);
3626  }
3627  const char *op_name();
3628 };
3629 Scalar CondExpEq(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3630  const Scalar &x3);
3631 ad_plain CondExpEq(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3632  const ad_plain &x3);
3633 ad_aug CondExpEq(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3634  const ad_aug &x3);
3635 Replay CondExpNe(const Replay &x0, const Replay &x1, const Replay &x2,
3636  const Replay &x3);
3637 struct CondExpNeOp : global::Operator<4, 1> {
3638  void forward(ForwardArgs<Scalar> &args);
3639  void reverse(ReverseArgs<Scalar> &args);
3640  void forward(ForwardArgs<Replay> &args);
3641  void reverse(ReverseArgs<Replay> &args);
3642  void forward(ForwardArgs<Writer> &args);
3643  void reverse(ReverseArgs<Writer> &args);
3644  template <class Type>
3645  void forward(ForwardArgs<Type> &args) {
3646  TMBAD_ASSERT(false);
3647  }
3648  template <class Type>
3649  void reverse(ReverseArgs<Type> &args) {
3650  TMBAD_ASSERT(false);
3651  }
3652  const char *op_name();
3653 };
3654 Scalar CondExpNe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3655  const Scalar &x3);
3656 ad_plain CondExpNe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3657  const ad_plain &x3);
3658 ad_aug CondExpNe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3659  const ad_aug &x3);
3660 Replay CondExpGt(const Replay &x0, const Replay &x1, const Replay &x2,
3661  const Replay &x3);
3662 struct CondExpGtOp : global::Operator<4, 1> {
3663  void forward(ForwardArgs<Scalar> &args);
3664  void reverse(ReverseArgs<Scalar> &args);
3665  void forward(ForwardArgs<Replay> &args);
3666  void reverse(ReverseArgs<Replay> &args);
3667  void forward(ForwardArgs<Writer> &args);
3668  void reverse(ReverseArgs<Writer> &args);
3669  template <class Type>
3670  void forward(ForwardArgs<Type> &args) {
3671  TMBAD_ASSERT(false);
3672  }
3673  template <class Type>
3674  void reverse(ReverseArgs<Type> &args) {
3675  TMBAD_ASSERT(false);
3676  }
3677  const char *op_name();
3678 };
3679 Scalar CondExpGt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3680  const Scalar &x3);
3681 ad_plain CondExpGt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3682  const ad_plain &x3);
3683 ad_aug CondExpGt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3684  const ad_aug &x3);
3685 Replay CondExpLt(const Replay &x0, const Replay &x1, const Replay &x2,
3686  const Replay &x3);
3687 struct CondExpLtOp : global::Operator<4, 1> {
3688  void forward(ForwardArgs<Scalar> &args);
3689  void reverse(ReverseArgs<Scalar> &args);
3690  void forward(ForwardArgs<Replay> &args);
3691  void reverse(ReverseArgs<Replay> &args);
3692  void forward(ForwardArgs<Writer> &args);
3693  void reverse(ReverseArgs<Writer> &args);
3694  template <class Type>
3695  void forward(ForwardArgs<Type> &args) {
3696  TMBAD_ASSERT(false);
3697  }
3698  template <class Type>
3699  void reverse(ReverseArgs<Type> &args) {
3700  TMBAD_ASSERT(false);
3701  }
3702  const char *op_name();
3703 };
3704 Scalar CondExpLt(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3705  const Scalar &x3);
3706 ad_plain CondExpLt(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3707  const ad_plain &x3);
3708 ad_aug CondExpLt(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3709  const ad_aug &x3);
3710 Replay CondExpGe(const Replay &x0, const Replay &x1, const Replay &x2,
3711  const Replay &x3);
3712 struct CondExpGeOp : global::Operator<4, 1> {
3713  void forward(ForwardArgs<Scalar> &args);
3714  void reverse(ReverseArgs<Scalar> &args);
3715  void forward(ForwardArgs<Replay> &args);
3716  void reverse(ReverseArgs<Replay> &args);
3717  void forward(ForwardArgs<Writer> &args);
3718  void reverse(ReverseArgs<Writer> &args);
3719  template <class Type>
3720  void forward(ForwardArgs<Type> &args) {
3721  TMBAD_ASSERT(false);
3722  }
3723  template <class Type>
3724  void reverse(ReverseArgs<Type> &args) {
3725  TMBAD_ASSERT(false);
3726  }
3727  const char *op_name();
3728 };
3729 Scalar CondExpGe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3730  const Scalar &x3);
3731 ad_plain CondExpGe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3732  const ad_plain &x3);
3733 ad_aug CondExpGe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3734  const ad_aug &x3);
3735 Replay CondExpLe(const Replay &x0, const Replay &x1, const Replay &x2,
3736  const Replay &x3);
3737 struct CondExpLeOp : global::Operator<4, 1> {
3738  void forward(ForwardArgs<Scalar> &args);
3739  void reverse(ReverseArgs<Scalar> &args);
3740  void forward(ForwardArgs<Replay> &args);
3741  void reverse(ReverseArgs<Replay> &args);
3742  void forward(ForwardArgs<Writer> &args);
3743  void reverse(ReverseArgs<Writer> &args);
3744  template <class Type>
3745  void forward(ForwardArgs<Type> &args) {
3746  TMBAD_ASSERT(false);
3747  }
3748  template <class Type>
3749  void reverse(ReverseArgs<Type> &args) {
3750  TMBAD_ASSERT(false);
3751  }
3752  const char *op_name();
3753 };
3754 Scalar CondExpLe(const Scalar &x0, const Scalar &x1, const Scalar &x2,
3755  const Scalar &x3);
3756 ad_plain CondExpLe(const ad_plain &x0, const ad_plain &x1, const ad_plain &x2,
3757  const ad_plain &x3);
3758 ad_aug CondExpLe(const ad_aug &x0, const ad_aug &x1, const ad_aug &x2,
3759  const ad_aug &x3);
3760 
3761 template <class Info>
3762 struct InfoOp : global::DynamicOperator<-1, 0> {
3763  Index n;
3764  Info info;
3765  InfoOp(Index n, Info info) : n(n), info(info) {}
3766  static const bool elimination_protected = true;
3767  static const bool add_forward_replay_copy = true;
3768  static const bool have_input_size_output_size = true;
3769  template <class Type>
3770  void forward(ForwardArgs<Type> &args) {}
3771  template <class Type>
3772  void reverse(ReverseArgs<Type> &args) {}
3773  Index input_size() const { return n; }
3774  Index output_size() const { return 0; }
3775  const char *op_name() { return "InfoOp"; }
3776  void print(global::print_config cfg) {
3777  Rcout << cfg.prefix << info << std::endl;
3778  }
3779  void *operator_data() { return &info; }
3780 };
3781 template <class Info>
3782 void addInfo(const std::vector<ad_aug> &x, const Info &info) {
3783  global::Complete<InfoOp<Info> >(x.size(), info)(x);
3784 }
3785 template <class Info>
3786 void addInfo(const std::vector<double> &x, const Info &info) {}
3787 
3788 struct SumOp : global::DynamicOperator<-1, 1> {
3789  static const bool is_linear = true;
3790  static const bool have_input_size_output_size = true;
3791  static const bool add_forward_replay_copy = true;
3792  size_t n;
3793  Index input_size() const;
3794  Index output_size() const;
3795  SumOp(size_t n);
3796  template <class Type>
3797  void forward(ForwardArgs<Type> &args) {
3798  args.y(0) = 0;
3799  for (size_t i = 0; i < n; i++) {
3800  args.y(0) += args.x(i);
3801  }
3802  }
3803  template <class Type>
3804  void reverse(ReverseArgs<Type> &args) {
3805  for (size_t i = 0; i < n; i++) {
3806  args.dx(i) += args.dy(0);
3807  }
3808  }
3809  const char *op_name();
3810 };
3811 template <class T>
3812 T sum(const std::vector<T> &x) {
3813  return global::Complete<SumOp>(x.size())(x)[0];
3814 }
3815 
3816 ad_plain logspace_sum(const std::vector<ad_plain> &x);
3817 struct LogSpaceSumOp : global::DynamicOperator<-1, 1> {
3818  size_t n;
3819  static const bool have_input_size_output_size = true;
3820  Index input_size() const;
3821  Index output_size() const;
3822  LogSpaceSumOp(size_t n);
3823  void forward(ForwardArgs<Scalar> &args);
3824  void forward(ForwardArgs<Replay> &args);
3825  template <class Type>
3826  void reverse(ReverseArgs<Type> &args) {
3827  for (size_t i = 0; i < n; i++) {
3828  args.dx(i) += exp(args.x(i) - args.y(0)) * args.dy(0);
3829  }
3830  }
3831  const char *op_name();
3832 };
3833 ad_plain logspace_sum(const std::vector<ad_plain> &x);
3834 template <class T>
3835 T logspace_sum(const std::vector<T> &x_) {
3836  std::vector<ad_plain> x(x_.begin(), x_.end());
3837  return logspace_sum(x);
3838 }
3839 
3840 ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
3841  const std::vector<Index> &stride, size_t n);
3842 struct LogSpaceSumStrideOp : global::DynamicOperator<-1, 1> {
3843  std::vector<Index> stride;
3844  size_t n;
3845  static const bool have_input_size_output_size = true;
3846 
3847  Index number_of_terms() const;
3848  template <class Type>
3849  Type &entry(Type **px, size_t i, size_t j) const {
3850  return px[j][0 + i * stride[j]];
3851  }
3852  template <class Type>
3853  Type rowsum(Type **px, size_t i) const {
3854  size_t m = stride.size();
3855  Type s = (Scalar)(0);
3856  for (size_t j = 0; j < m; j++) {
3857  s += entry(px, i, j);
3858  }
3859  return s;
3860  }
3861  Index input_size() const;
3862  Index output_size() const;
3863  LogSpaceSumStrideOp(std::vector<Index> stride, size_t n);
3864  void forward(ForwardArgs<Scalar> &args);
3865  void forward(ForwardArgs<Replay> &args);
3866  template <class Type>
3867  void reverse(ReverseArgs<Type> &args) {
3868  size_t m = stride.size();
3869  std::vector<Type *> wrk1(m);
3870  std::vector<Type *> wrk2(m);
3871  Type **px = &(wrk1[0]);
3872  Type **pdx = &(wrk2[0]);
3873  for (size_t i = 0; i < m; i++) {
3874  px[i] = args.x_ptr(i);
3875  pdx[i] = args.dx_ptr(i);
3876  }
3877  for (size_t i = 0; i < n; i++) {
3878  Type s = rowsum(px, i);
3879  Type tmp = exp(s - args.y(0)) * args.dy(0);
3880  for (size_t j = 0; j < m; j++) {
3881  entry(pdx, i, j) += tmp;
3882  }
3883  }
3884  }
3889  void dependencies(Args<> &args, Dependencies &dep) const;
3891  static const bool have_dependencies = true;
3893  static const bool implicit_dependencies = true;
3895  static const bool allow_remap = false;
3896  const char *op_name();
3897 
3898  void forward(ForwardArgs<Writer> &args);
3899  void reverse(ReverseArgs<Writer> &args);
3900 };
3901 ad_plain logspace_sum_stride(const std::vector<ad_plain> &x,
3902  const std::vector<Index> &stride, size_t n);
3903 template <class T>
3904 T logspace_sum_stride(const std::vector<T> &x_,
3905  const std::vector<Index> &stride, size_t n) {
3906  std::vector<ad_plain> x(x_.begin(), x_.end());
3907  return logspace_sum_stride(x, stride, n);
3908 }
3909 } // namespace TMBad
3910 #endif // HAVE_GLOBAL_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
segment_ref< ReverseArgs, dx_write > dx_segment(Index from, Index size)
segment version
Definition: global.hpp:344
Add zero allocated workspace to the tape.
Definition: global.hpp:2354
void reverse_decr(ReverseArgs< Writer > &args)
Source code writer.
Definition: global.hpp:2166
diff --git a/graph2dot_8hpp_source.html b/graph2dot_8hpp_source.html index fcc1715d8..5afd5a0a1 100644 --- a/graph2dot_8hpp_source.html +++ b/graph2dot_8hpp_source.html @@ -73,6 +73,6 @@
graph2dot.hpp
-
1 #ifndef HAVE_GRAPH2DOT_HPP
2 #define HAVE_GRAPH2DOT_HPP
3 // Autogenerated - do not edit by hand !
4 #include <fstream>
5 #include <iostream>
6 #include "global.hpp"
7 
8 namespace TMBad {
9 
10 void graph2dot(global glob, graph G, bool show_id = false,
11  std::ostream& cout = Rcout);
12 
13 void graph2dot(global glob, bool show_id = false, std::ostream& cout = Rcout);
14 
15 void graph2dot(const char* filename, global glob, graph G,
16  bool show_id = false);
17 
18 void graph2dot(const char* filename, global glob, bool show_id = false);
19 
20 } // namespace TMBad
21 #endif // HAVE_GRAPH2DOT_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_GRAPH2DOT_HPP
2 #define HAVE_GRAPH2DOT_HPP
3 // Autogenerated - do not edit by hand !
4 #include <fstream>
5 #include <iostream>
6 #include "global.hpp"
7 
8 namespace TMBad {
9 
10 void graph2dot(global glob, graph G, bool show_id = false,
11  std::ostream& cout = Rcout);
12 
13 void graph2dot(global glob, bool show_id = false, std::ostream& cout = Rcout);
14 
15 void graph2dot(const char* filename, global glob, graph G,
16  bool show_id = false);
17 
18 void graph2dot(const char* filename, global glob, bool show_id = false);
19 
20 } // namespace TMBad
21 #endif // HAVE_GRAPH2DOT_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
License: GPL v2 diff --git a/graph__transform_8hpp_source.html b/graph__transform_8hpp_source.html index ea2f0e949..fd673f81a 100644 --- a/graph__transform_8hpp_source.html +++ b/graph__transform_8hpp_source.html @@ -73,7 +73,7 @@
graph_transform.hpp
-
1 #ifndef HAVE_GRAPH_TRANSFORM_HPP
2 #define HAVE_GRAPH_TRANSFORM_HPP
3 // Autogenerated - do not edit by hand !
4 #include <cstring>
5 #include <list>
6 #include <map>
7 #include "checkpoint.hpp"
8 #include "global.hpp"
9 #include "integrate.hpp"
10 #include "radix.hpp"
11 
12 namespace TMBad {
13 
18 template <class T>
19 std::vector<bool> lmatch(const std::vector<T> &x, const std::vector<T> &y) {
20  std::vector<bool> ans(x.size(), false);
21  for (size_t i = 0; i < x.size(); i++)
22  for (size_t j = 0; j < y.size(); j++) ans[i] = ans[i] || (x[i] == y[j]);
23  return ans;
24 }
25 
27 template <class I>
28 std::vector<I> which(const std::vector<bool> &x) {
29  std::vector<I> y;
30  for (size_t i = 0; i < x.size(); i++)
31  if (x[i]) y.push_back(i);
32  return y;
33 }
34 
36 std::vector<size_t> which(const std::vector<bool> &x);
37 
39 template <class T>
40 std::vector<T> subset(const std::vector<T> &x, const std::vector<bool> &y) {
41  TMBAD_ASSERT(x.size() == y.size());
42  std::vector<T> ans;
43  for (size_t i = 0; i < x.size(); i++)
44  if (y[i]) ans.push_back(x[i]);
45  return ans;
46 }
47 
49 template <class T, class I>
50 std::vector<T> subset(const std::vector<T> &x, const std::vector<I> &ind) {
51  std::vector<T> ans(ind.size());
52  for (size_t i = 0; i < ind.size(); i++) ans[i] = x[ind[i]];
53  return ans;
54 }
55 
67 template <class T, class I>
68 void make_space_inplace(std::vector<T> &x, std::vector<I> &i, T space = T(0)) {
69  std::vector<bool> mark(x.size(), false);
70  for (size_t k = 0; k < i.size(); k++) {
71  TMBAD_ASSERT(!mark[i[k]]);
72  mark[i[k]] = true;
73  }
74  std::vector<T> x_new;
75  std::vector<I> i_new;
76  for (size_t k = 0; k < x.size(); k++) {
77  if (mark[k]) {
78  x_new.push_back(space);
79  i_new.push_back(x_new.size());
80  }
81  x_new.push_back(x[k]);
82  }
83  std::swap(x, x_new);
84  std::swap(i, i_new);
85 }
86 
88 template <class T>
89 std::vector<T> invperm(const std::vector<T> &perm) {
90  std::vector<T> iperm(perm.size());
91  for (size_t i = 0; i < perm.size(); i++) iperm[perm[i]] = i;
92  return iperm;
93 }
94 
96 template <class T>
97 std::vector<size_t> match(const std::vector<T> &x, const std::vector<T> &y) {
98  return which(lmatch(x, y));
99 }
100 
102 size_t prod_int(const std::vector<size_t> &x);
103 
116 template <class T>
117 std::vector<size_t> order(std::vector<T> x) {
118  std::vector<std::pair<T, size_t> > y(x.size());
119  for (size_t i = 0; i < x.size(); i++) {
120  y[i].first = x[i];
121  y[i].second = i;
122  }
123  sort_inplace(y);
124  std::vector<size_t> z(x.size());
125  for (size_t i = 0; i < x.size(); i++) {
126  z[i] = y[i].second;
127  }
128  return z;
129 }
130 
132 std::vector<bool> reverse_boundary(global &glob, const std::vector<bool> &vars);
133 
141 std::vector<Index> get_accumulation_tree(global &glob, bool boundary = false);
142 
144 std::vector<Index> find_op_by_name(global &glob, const char *name);
145 
149 std::vector<Index> substitute(global &glob, const std::vector<Index> &seq,
150  bool inv_tags = true, bool dep_tags = true);
151 
153 std::vector<Index> substitute(global &glob, const char *name,
154  bool inv_tags = true, bool dep_tags = true);
155 
163 global accumulation_tree_split(global glob, bool sum_ = false);
164 
171 void aggregate(global &glob, int sign = 1);
172 
177 struct old_state {
178  std::vector<Index> dep_index;
179  size_t opstack_size;
180  global &glob;
181  old_state(global &glob);
182  void restore();
183 };
184 
185 std::vector<Index> remap_identical_sub_expressions(
186  global &glob, std::vector<Index> inv_remap);
187 struct term_info {
188  global &glob;
189  std::vector<Index> id;
190  std::vector<size_t> count;
191  term_info(global &glob, bool do_init = true);
192  void initialize(std::vector<Index> inv_remap = std::vector<Index>(0));
193 };
194 
195 struct gk_config {
196  bool debug;
197  bool adaptive;
198  bool nan2zero;
203  double ytol;
204  double dx;
205  gk_config();
206 };
207 
208 template <class Float = ad_adapt>
209 struct logIntegrate_t {
210  typedef Float Scalar;
211  global glob;
212  double mu, sigma, f_mu;
213  gk_config cfg;
214  double f(double x) {
215  Index k = glob.inv_index.size();
216  glob.value_inv(k - 1) = x;
217  glob.forward();
218  return glob.value_dep(0);
219  }
220  double g(double x) {
221  return (f(x + .5 * cfg.dx) - f(x - .5 * cfg.dx)) / cfg.dx;
222  }
223  double h(double x) {
224  return (g(x + .5 * cfg.dx) - g(x - .5 * cfg.dx)) / cfg.dx;
225  }
232  void rescale_integrand(const std::vector<ad_aug> &x) {
233  TMBAD_ASSERT(x.size() + 1 == glob.inv_index.size());
234  if (cfg.debug) Rcout << "rescale integrand:\n";
235  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i].Value();
236  mu = glob.value_inv(x.size());
237  f_mu = f(mu);
238  int i = 0;
239  for (; i < 100; i++) {
240  double g_mu = g(mu);
241  double h_mu = h(mu);
242  if (std::isfinite(f_mu) && !std::isfinite(h_mu)) {
243  cfg.dx = cfg.dx * .5;
244  continue;
245  }
246  double mu_new;
247  if (h_mu < 0)
248  mu_new = mu - g_mu / h_mu;
249  else
250  mu_new = mu + (g_mu > 0 ? cfg.dx : -cfg.dx);
251  double f_mu_new = f(mu_new);
252  if (cfg.debug) {
253  Rcout << "mu=" << mu << " mu_new=" << mu_new << " g_mu=" << g_mu
254  << " h_mu=" << h_mu << " f_mu=" << f_mu
255  << " f_mu_new=" << f_mu_new << "\n";
256  }
257  if (f_mu_new > f_mu + cfg.ytol) {
258  mu = mu_new;
259  f_mu = f_mu_new;
260  } else {
261  break;
262  }
263  }
264  sigma = 1. / sqrt(-h(mu));
265  if (!std::isfinite(sigma)) sigma = 10000;
266  if (cfg.debug)
267  Rcout << "==> i=" << i << " mu=" << mu << " f_mu=" << f_mu
268  << " sigma=" << sigma << "\n";
269  }
270 
271  logIntegrate_t(global &glob, gk_config cfg)
272  : glob(glob), mu(0), sigma(1), f_mu(0), cfg(cfg) {
273  TMBAD_ASSERT(glob.inv_index.size() >= 1);
274  TMBAD_ASSERT(glob.dep_index.size() == 1);
275  }
276  logIntegrate_t() {}
277  global::replay *p_replay;
278 
279  Float operator()(Float u) {
280  Index k = glob.inv_index.size();
281  p_replay->value_inv(k - 1) = sigma * u + mu;
282  p_replay->forward(false, false);
283  Float ans = exp(p_replay->value_dep(0) - f_mu);
284  if (cfg.nan2zero && ans != ans) ans = 0;
285  return ans;
286  }
287 
288  std::vector<ad_aug> operator()(const std::vector<ad_aug> &x) {
289  rescale_integrand(x);
290  global::replay replay(this->glob, *get_glob());
291  p_replay = &replay;
292  replay.start();
293  Index k = glob.inv_index.size();
294  for (Index i = 0; i < k - 1; i++) replay.value_inv(i) = x[i];
295  Float I = integrate(*this);
296  Float ans = log(I) + log(sigma) + f_mu;
297  replay.stop();
298  return std::vector<ad_aug>(1, ans);
299  }
300 };
301 
302 template <class ADFun>
303 struct integrate_subgraph {
304  global &glob;
305  std::vector<Index> random;
306  graph forward_graph;
307  graph reverse_graph;
308  std::vector<Index> var_remap;
309  std::vector<bool> mark;
310  gk_config cfg;
314  integrate_subgraph(global &glob, std::vector<Index> random,
315  gk_config cfg = gk_config())
316  : glob(glob),
317  random(random),
318  forward_graph(glob.forward_graph()),
319  reverse_graph(glob.reverse_graph()),
320  cfg(cfg) {
321  glob.subgraph_cache_ptr();
322  mark.resize(glob.opstack.size(), false);
323  }
327  global &try_integrate_variable(Index i) {
328  const std::vector<Index> &inv2op = forward_graph.inv2op;
329 
330  Index start_node = inv2op[i];
331  glob.subgraph_seq.resize(0);
332  glob.subgraph_seq.push_back(start_node);
333  forward_graph.search(glob.subgraph_seq);
334 
335  if (glob.subgraph_seq.size() == 1) return glob;
336 
337  bool any_marked = false;
338  for (Index i = 0; i < glob.subgraph_seq.size(); i++) {
339  any_marked |= mark[glob.subgraph_seq[i]];
340  if (any_marked) {
341  return glob;
342  }
343  }
344 
345  for (Index i = 0; i < glob.subgraph_seq.size(); i++) {
346  mark[glob.subgraph_seq[i]] = true;
347  }
348 
349  std::vector<Index> boundary = reverse_graph.boundary(glob.subgraph_seq);
350 
351  global new_glob;
352  var_remap.resize(glob.values.size());
353  new_glob.ad_start();
354  Index total_boundary_vars = 0;
355  std::vector<ad_plain> boundary_vars;
356  OperatorPure *constant = glob.getOperator<global::ConstOp>();
357  for (Index i = 0; i < boundary.size(); i++) {
358  Index m = glob.opstack[boundary[i]]->output_size();
359  for (Index j = 0; j < m; j++) {
360  Index boundary_var = glob.subgraph_ptr[boundary[i]].second + j;
361  var_remap[boundary_var] = total_boundary_vars;
362  total_boundary_vars++;
363  if (glob.opstack[boundary[i]] != constant) {
364  ad_plain().Independent();
365  ad_plain tmp;
366  tmp.index = boundary_var;
367  boundary_vars.push_back(tmp);
368  } else {
369  ad_plain(glob.values[boundary_var]);
370  }
371  }
372  }
373  new_glob.ad_stop();
374 
375  new_glob = glob.extract_sub(var_remap, new_glob);
376 
377  aggregate(new_glob);
378 
379  logIntegrate_t<> taped_integral(new_glob, cfg);
380 
381  glob.ad_start();
382  std::vector<ad_aug> boundary_vars2(boundary_vars.begin(),
383  boundary_vars.end());
384  if (cfg.adaptive) {
386  global::Complete<AtomOp<DTab> > taped_integral_operator(taped_integral,
387  boundary_vars2);
388  taped_integral_operator(boundary_vars)[0].Dependent();
389  } else {
390  taped_integral(boundary_vars2)[0].Dependent();
391  }
392  glob.ad_stop();
393  return glob;
394  }
395  global &gk() {
396  for (Index i = 0; i < random.size(); i++) {
397  try_integrate_variable(random[i]);
398  }
399 
400  std::vector<bool> keep_node = mark;
401  keep_node.flip();
402 
403  keep_node.resize(glob.opstack.size(), true);
404 
405  std::vector<Index> v2o = glob.var2op();
406  for (Index i = 0; i < glob.inv_index.size(); i++) {
407  keep_node[v2o[glob.inv_index[i]]] = true;
408  }
409 
410  glob.subgraph_seq.resize(0);
411  for (Index i = 0; i < keep_node.size(); i++) {
412  if (keep_node[i]) glob.subgraph_seq.push_back(i);
413  }
414 
415  glob = glob.extract_sub();
416  return glob;
417  }
418 };
419 
436  private:
437  std::vector<size_t> x;
438  std::vector<bool> mask_;
439  size_t pointer;
440  std::vector<size_t> bound;
441 
442  public:
447  size_t count();
453  multivariate_index(size_t bound_, size_t dim, bool flag = true);
458  multivariate_index(std::vector<size_t> bound, bool flag = true);
460  void flip();
462  multivariate_index &operator++();
464  operator size_t();
466  size_t index(size_t i);
468  std::vector<size_t> index();
470  std::vector<bool>::reference mask(size_t i);
472  void set_mask(const std::vector<bool> &mask);
473 };
474 
480 struct clique {
482  std::vector<Index> indices;
484  std::vector<ad_aug> logsum;
486  std::vector<size_t> dim;
487  size_t clique_size();
488  clique();
489  void subset_inplace(const std::vector<bool> &mask);
490  void logsum_init();
491  bool empty() const;
492  bool contains(Index i);
510  void get_stride(const clique &super, Index ind, std::vector<ad_plain> &offset,
511  Index &stride);
512 };
513 
520 struct sr_grid {
521  std::vector<Scalar> x;
522  std::vector<Scalar> w;
523  sr_grid();
524 
525  sr_grid(Scalar a, Scalar b, size_t n);
526 
527  sr_grid(size_t n);
528  size_t size();
529 
530  std::vector<ad_plain> logw;
531  ad_plain logw_offset();
532 };
533 
582  std::list<clique> cliques;
583  std::vector<sr_grid> grid;
584  std::vector<Index> inv2grid;
585  global &glob;
586  global new_glob;
587  std::vector<Index> random;
588  global::replay replay;
589  std::vector<bool> mark;
590  graph forward_graph;
591  graph reverse_graph;
592  std::vector<Index> var_remap;
593  const static Index NA = -1;
594  std::vector<Index> op2inv_idx;
595  std::vector<Index> op2dep_idx;
596  std::vector<bool> terms_done;
597  term_info tinfo;
598  std::map<size_t, std::vector<ad_aug> > cache;
615  sequential_reduction(global &glob, std::vector<Index> random,
616  std::vector<sr_grid> grid =
617  std::vector<sr_grid>(1, sr_grid(-20, 20, 200)),
618  std::vector<Index> random2grid = std::vector<Index>(0),
619  bool perm = true);
626  void reorder_random();
627 
628  std::vector<size_t> get_grid_bounds(std::vector<Index> inv_index);
629 
630  std::vector<sr_grid *> get_grid(std::vector<Index> inv_index);
642  std::vector<ad_aug> tabulate(std::vector<Index> inv_index, Index dep_index);
643 
665  void merge(Index i);
666 
678  void update(Index i);
679  void show_cliques();
680  void update_all();
681  ad_aug get_result();
682  global marginal();
683 };
684 
718 struct autopar {
719  global &glob;
720  graph reverse_graph;
721  size_t num_threads;
727  std::vector<std::vector<Index> > node_split;
729  std::vector<std::vector<Index> > inv_idx;
731  std::vector<std::vector<Index> > dep_idx;
733  std::vector<global> vglob;
734  autopar(global &glob, size_t num_threads);
737  std::vector<size_t> max_tree_depth();
738 
739  template <class T>
740  size_t which_min(const std::vector<T> &x) {
741  return std::min_element(x.begin(), x.end()) - x.begin();
742  }
743 
744  void run();
746  void extract();
748  size_t input_size() const;
750  size_t output_size() const;
751 };
752 
757  static const bool have_input_size_output_size = true;
759  std::vector<global> vglob;
761  std::vector<std::vector<Index> > inv_idx;
764  std::vector<std::vector<Index> > dep_idx;
765 
766  Index n, m;
767  Index input_size() const;
768  Index output_size() const;
769  ParalOp(const autopar &ap);
770 
771  template <class T>
772  void reverse(ReverseArgs<T> &args) {
773  bool parallel_operator_used_with_valid_type = false;
774  TMBAD_ASSERT(parallel_operator_used_with_valid_type);
775  }
776  static const bool add_forward_replay_copy = true;
777  template <class T>
778  void forward(ForwardArgs<T> &args) {
779  bool parallel_operator_used_with_valid_type = false;
780  TMBAD_ASSERT(parallel_operator_used_with_valid_type);
781  }
782 
783  void forward(ForwardArgs<Scalar> &args);
784  void reverse(ReverseArgs<Scalar> &args);
785  const char *op_name();
786  void print(global::print_config cfg);
787 };
788 
789 std::vector<Index> get_likely_expression_duplicates(
790  const global &glob, std::vector<Index> inv_remap);
791 
796 bool all_allow_remap(const global &glob);
797 
799 template <class T>
800 struct forbid_remap {
801  T &remap;
802  forbid_remap(T &remap) : remap(remap) {}
803  void operator()(Index a, Index b) {
804  bool ok = true;
805  for (Index i = a + 1; i <= b; i++) {
806  ok &= (remap[i] - remap[i - 1] == 1);
807  }
808  if (ok) {
809  return;
810  } else {
811  for (Index i = a; i <= b; i++) {
812  remap[i] = i;
813  }
814  }
815  }
816 };
817 
874 std::vector<Index> remap_identical_sub_expressions(
875  global &glob, std::vector<Index> inv_remap);
876 
878 
879 std::vector<Position> inv_positions(global &glob);
880 
886 void reorder_graph(global &glob, std::vector<Index> inv_idx);
887 
888 } // namespace TMBad
889 #endif // HAVE_GRAPH_TRANSFORM_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_GRAPH_TRANSFORM_HPP
2 #define HAVE_GRAPH_TRANSFORM_HPP
3 // Autogenerated - do not edit by hand !
4 #include <cstring>
5 #include <list>
6 #include <map>
7 #include "checkpoint.hpp"
8 #include "global.hpp"
9 #include "integrate.hpp"
10 #include "radix.hpp"
11 
12 namespace TMBad {
13 
18 template <class T>
19 std::vector<bool> lmatch(const std::vector<T> &x, const std::vector<T> &y) {
20  std::vector<bool> ans(x.size(), false);
21  for (size_t i = 0; i < x.size(); i++)
22  for (size_t j = 0; j < y.size(); j++) ans[i] = ans[i] || (x[i] == y[j]);
23  return ans;
24 }
25 
27 template <class I>
28 std::vector<I> which(const std::vector<bool> &x) {
29  std::vector<I> y;
30  for (size_t i = 0; i < x.size(); i++)
31  if (x[i]) y.push_back(i);
32  return y;
33 }
34 
36 std::vector<size_t> which(const std::vector<bool> &x);
37 
39 template <class T>
40 std::vector<T> subset(const std::vector<T> &x, const std::vector<bool> &y) {
41  TMBAD_ASSERT(x.size() == y.size());
42  std::vector<T> ans;
43  for (size_t i = 0; i < x.size(); i++)
44  if (y[i]) ans.push_back(x[i]);
45  return ans;
46 }
47 
49 template <class T, class I>
50 std::vector<T> subset(const std::vector<T> &x, const std::vector<I> &ind) {
51  std::vector<T> ans(ind.size());
52  for (size_t i = 0; i < ind.size(); i++) ans[i] = x[ind[i]];
53  return ans;
54 }
55 
67 template <class T, class I>
68 void make_space_inplace(std::vector<T> &x, std::vector<I> &i, T space = T(0)) {
69  std::vector<bool> mark(x.size(), false);
70  for (size_t k = 0; k < i.size(); k++) {
71  TMBAD_ASSERT(!mark[i[k]]);
72  mark[i[k]] = true;
73  }
74  std::vector<T> x_new;
75  std::vector<I> i_new;
76  for (size_t k = 0; k < x.size(); k++) {
77  if (mark[k]) {
78  x_new.push_back(space);
79  i_new.push_back(x_new.size());
80  }
81  x_new.push_back(x[k]);
82  }
83  std::swap(x, x_new);
84  std::swap(i, i_new);
85 }
86 
88 template <class T>
89 std::vector<T> invperm(const std::vector<T> &perm) {
90  std::vector<T> iperm(perm.size());
91  for (size_t i = 0; i < perm.size(); i++) iperm[perm[i]] = i;
92  return iperm;
93 }
94 
96 template <class T>
97 std::vector<size_t> match(const std::vector<T> &x, const std::vector<T> &y) {
98  return which(lmatch(x, y));
99 }
100 
102 size_t prod_int(const std::vector<size_t> &x);
103 
116 template <class T>
117 std::vector<size_t> order(std::vector<T> x) {
118  std::vector<std::pair<T, size_t> > y(x.size());
119  for (size_t i = 0; i < x.size(); i++) {
120  y[i].first = x[i];
121  y[i].second = i;
122  }
123  sort_inplace(y);
124  std::vector<size_t> z(x.size());
125  for (size_t i = 0; i < x.size(); i++) {
126  z[i] = y[i].second;
127  }
128  return z;
129 }
130 
132 std::vector<bool> reverse_boundary(global &glob, const std::vector<bool> &vars);
133 
141 std::vector<Index> get_accumulation_tree(global &glob, bool boundary = false);
142 
144 std::vector<Index> find_op_by_name(global &glob, const char *name);
145 
149 std::vector<Index> substitute(global &glob, const std::vector<Index> &seq,
150  bool inv_tags = true, bool dep_tags = true);
151 
153 std::vector<Index> substitute(global &glob, const char *name,
154  bool inv_tags = true, bool dep_tags = true);
155 
163 global accumulation_tree_split(global glob, bool sum_ = false);
164 
171 void aggregate(global &glob, int sign = 1);
172 
177 struct old_state {
178  std::vector<Index> dep_index;
179  size_t opstack_size;
180  global &glob;
181  old_state(global &glob);
182  void restore();
183 };
184 
185 std::vector<Index> remap_identical_sub_expressions(
186  global &glob, std::vector<Index> inv_remap);
187 struct term_info {
188  global &glob;
189  std::vector<Index> id;
190  std::vector<size_t> count;
191  term_info(global &glob, bool do_init = true);
192  void initialize(std::vector<Index> inv_remap = std::vector<Index>(0));
193 };
194 
195 struct gk_config {
196  bool debug;
197  bool adaptive;
198  bool nan2zero;
203  double ytol;
204  double dx;
205  gk_config();
206 };
207 
208 template <class Float = ad_adapt>
209 struct logIntegrate_t {
210  typedef Float Scalar;
211  global glob;
212  double mu, sigma, f_mu;
213  gk_config cfg;
214  double f(double x) {
215  Index k = glob.inv_index.size();
216  glob.value_inv(k - 1) = x;
217  glob.forward();
218  return glob.value_dep(0);
219  }
220  double g(double x) {
221  return (f(x + .5 * cfg.dx) - f(x - .5 * cfg.dx)) / cfg.dx;
222  }
223  double h(double x) {
224  return (g(x + .5 * cfg.dx) - g(x - .5 * cfg.dx)) / cfg.dx;
225  }
232  void rescale_integrand(const std::vector<ad_aug> &x) {
233  TMBAD_ASSERT(x.size() + 1 == glob.inv_index.size());
234  if (cfg.debug) Rcout << "rescale integrand:\n";
235  for (size_t i = 0; i < x.size(); i++) glob.value_inv(i) = x[i].Value();
236  mu = glob.value_inv(x.size());
237  f_mu = f(mu);
238  int i = 0;
239  for (; i < 100; i++) {
240  double g_mu = g(mu);
241  double h_mu = h(mu);
242  if (std::isfinite(f_mu) && !std::isfinite(h_mu)) {
243  cfg.dx = cfg.dx * .5;
244  continue;
245  }
246  double mu_new;
247  if (h_mu < 0)
248  mu_new = mu - g_mu / h_mu;
249  else
250  mu_new = mu + (g_mu > 0 ? cfg.dx : -cfg.dx);
251  double f_mu_new = f(mu_new);
252  if (cfg.debug) {
253  Rcout << "mu=" << mu << " mu_new=" << mu_new << " g_mu=" << g_mu
254  << " h_mu=" << h_mu << " f_mu=" << f_mu
255  << " f_mu_new=" << f_mu_new << "\n";
256  }
257  if (f_mu_new > f_mu + cfg.ytol) {
258  mu = mu_new;
259  f_mu = f_mu_new;
260  } else {
261  break;
262  }
263  }
264  sigma = 1. / sqrt(-h(mu));
265  if (!std::isfinite(sigma)) sigma = 10000;
266  if (cfg.debug)
267  Rcout << "==> i=" << i << " mu=" << mu << " f_mu=" << f_mu
268  << " sigma=" << sigma << "\n";
269  }
270 
271  logIntegrate_t(global &glob, gk_config cfg)
272  : glob(glob), mu(0), sigma(1), f_mu(0), cfg(cfg) {
273  TMBAD_ASSERT(glob.inv_index.size() >= 1);
274  TMBAD_ASSERT(glob.dep_index.size() == 1);
275  }
276  logIntegrate_t() {}
277  global::replay *p_replay;
278 
279  Float operator()(Float u) {
280  Index k = glob.inv_index.size();
281  p_replay->value_inv(k - 1) = sigma * u + mu;
282  p_replay->forward(false, false);
283  Float ans = exp(p_replay->value_dep(0) - f_mu);
284  if (cfg.nan2zero && ans != ans) ans = 0;
285  return ans;
286  }
287 
288  std::vector<ad_aug> operator()(const std::vector<ad_aug> &x) {
289  rescale_integrand(x);
290  global::replay replay(this->glob, *get_glob());
291  p_replay = &replay;
292  replay.start();
293  Index k = glob.inv_index.size();
294  for (Index i = 0; i < k - 1; i++) replay.value_inv(i) = x[i];
295  Float I = integrate(*this);
296  Float ans = log(I) + log(sigma) + f_mu;
297  replay.stop();
298  return std::vector<ad_aug>(1, ans);
299  }
300 };
301 
302 template <class ADFun>
303 struct integrate_subgraph {
304  global &glob;
305  std::vector<Index> random;
306  graph forward_graph;
307  graph reverse_graph;
308  std::vector<Index> var_remap;
309  std::vector<bool> mark;
310  gk_config cfg;
314  integrate_subgraph(global &glob, std::vector<Index> random,
315  gk_config cfg = gk_config())
316  : glob(glob),
317  random(random),
318  forward_graph(glob.forward_graph()),
319  reverse_graph(glob.reverse_graph()),
320  cfg(cfg) {
321  glob.subgraph_cache_ptr();
322  mark.resize(glob.opstack.size(), false);
323  }
327  global &try_integrate_variable(Index i) {
328  const std::vector<Index> &inv2op = forward_graph.inv2op;
329 
330  Index start_node = inv2op[i];
331  glob.subgraph_seq.resize(0);
332  glob.subgraph_seq.push_back(start_node);
333  forward_graph.search(glob.subgraph_seq);
334 
335  if (glob.subgraph_seq.size() == 1) return glob;
336 
337  bool any_marked = false;
338  for (Index i = 0; i < glob.subgraph_seq.size(); i++) {
339  any_marked |= mark[glob.subgraph_seq[i]];
340  if (any_marked) {
341  return glob;
342  }
343  }
344 
345  for (Index i = 0; i < glob.subgraph_seq.size(); i++) {
346  mark[glob.subgraph_seq[i]] = true;
347  }
348 
349  std::vector<Index> boundary = reverse_graph.boundary(glob.subgraph_seq);
350 
351  global new_glob;
352  var_remap.resize(glob.values.size());
353  new_glob.ad_start();
354  Index total_boundary_vars = 0;
355  std::vector<ad_plain> boundary_vars;
356  OperatorPure *constant = glob.getOperator<global::ConstOp>();
357  for (Index i = 0; i < boundary.size(); i++) {
358  Index m = glob.opstack[boundary[i]]->output_size();
359  for (Index j = 0; j < m; j++) {
360  Index boundary_var = glob.subgraph_ptr[boundary[i]].second + j;
361  var_remap[boundary_var] = total_boundary_vars;
362  total_boundary_vars++;
363  if (glob.opstack[boundary[i]] != constant) {
364  ad_plain().Independent();
365  ad_plain tmp;
366  tmp.index = boundary_var;
367  boundary_vars.push_back(tmp);
368  } else {
369  ad_plain(glob.values[boundary_var]);
370  }
371  }
372  }
373  new_glob.ad_stop();
374 
375  new_glob = glob.extract_sub(var_remap, new_glob);
376 
377  aggregate(new_glob);
378 
379  logIntegrate_t<> taped_integral(new_glob, cfg);
380 
381  glob.ad_start();
382  std::vector<ad_aug> boundary_vars2(boundary_vars.begin(),
383  boundary_vars.end());
384  if (cfg.adaptive) {
386  global::Complete<AtomOp<DTab> > taped_integral_operator(taped_integral,
387  boundary_vars2);
388  taped_integral_operator(boundary_vars)[0].Dependent();
389  } else {
390  taped_integral(boundary_vars2)[0].Dependent();
391  }
392  glob.ad_stop();
393  return glob;
394  }
395  global &gk() {
396  for (Index i = 0; i < random.size(); i++) {
397  try_integrate_variable(random[i]);
398  }
399 
400  std::vector<bool> keep_node = mark;
401  keep_node.flip();
402 
403  keep_node.resize(glob.opstack.size(), true);
404 
405  std::vector<Index> v2o = glob.var2op();
406  for (Index i = 0; i < glob.inv_index.size(); i++) {
407  keep_node[v2o[glob.inv_index[i]]] = true;
408  }
409 
410  glob.subgraph_seq.resize(0);
411  for (Index i = 0; i < keep_node.size(); i++) {
412  if (keep_node[i]) glob.subgraph_seq.push_back(i);
413  }
414 
415  glob = glob.extract_sub();
416  return glob;
417  }
418 };
419 
436  private:
437  std::vector<size_t> x;
438  std::vector<bool> mask_;
439  size_t pointer;
440  std::vector<size_t> bound;
441 
442  public:
447  size_t count();
453  multivariate_index(size_t bound_, size_t dim, bool flag = true);
458  multivariate_index(std::vector<size_t> bound, bool flag = true);
460  void flip();
462  multivariate_index &operator++();
464  operator size_t();
466  size_t index(size_t i);
468  std::vector<size_t> index();
470  std::vector<bool>::reference mask(size_t i);
472  void set_mask(const std::vector<bool> &mask);
473 };
474 
480 struct clique {
482  std::vector<Index> indices;
484  std::vector<ad_aug> logsum;
486  std::vector<size_t> dim;
487  size_t clique_size();
488  clique();
489  void subset_inplace(const std::vector<bool> &mask);
490  void logsum_init();
491  bool empty() const;
492  bool contains(Index i);
510  void get_stride(const clique &super, Index ind, std::vector<ad_plain> &offset,
511  Index &stride);
512 };
513 
520 struct sr_grid {
521  std::vector<Scalar> x;
522  std::vector<Scalar> w;
523  sr_grid();
524 
525  sr_grid(Scalar a, Scalar b, size_t n);
526 
527  sr_grid(size_t n);
528  size_t size();
529 
530  std::vector<ad_plain> logw;
531  ad_plain logw_offset();
532 };
533 
582  std::list<clique> cliques;
583  std::vector<sr_grid> grid;
584  std::vector<Index> inv2grid;
585  global &glob;
586  global new_glob;
587  std::vector<Index> random;
588  global::replay replay;
589  std::vector<bool> mark;
590  graph forward_graph;
591  graph reverse_graph;
592  std::vector<Index> var_remap;
593  const static Index NA = -1;
594  std::vector<Index> op2inv_idx;
595  std::vector<Index> op2dep_idx;
596  std::vector<bool> terms_done;
597  term_info tinfo;
598  std::map<size_t, std::vector<ad_aug> > cache;
615  sequential_reduction(global &glob, std::vector<Index> random,
616  std::vector<sr_grid> grid =
617  std::vector<sr_grid>(1, sr_grid(-20, 20, 200)),
618  std::vector<Index> random2grid = std::vector<Index>(0),
619  bool perm = true);
626  void reorder_random();
627 
628  std::vector<size_t> get_grid_bounds(std::vector<Index> inv_index);
629 
630  std::vector<sr_grid *> get_grid(std::vector<Index> inv_index);
642  std::vector<ad_aug> tabulate(std::vector<Index> inv_index, Index dep_index);
643 
665  void merge(Index i);
666 
678  void update(Index i);
679  void show_cliques();
680  void update_all();
681  ad_aug get_result();
682  global marginal();
683 };
684 
718 struct autopar {
719  global &glob;
720  graph reverse_graph;
721  size_t num_threads;
727  std::vector<std::vector<Index> > node_split;
729  std::vector<std::vector<Index> > inv_idx;
731  std::vector<std::vector<Index> > dep_idx;
733  std::vector<global> vglob;
734  autopar(global &glob, size_t num_threads);
737  std::vector<size_t> max_tree_depth();
738 
739  template <class T>
740  size_t which_min(const std::vector<T> &x) {
741  return std::min_element(x.begin(), x.end()) - x.begin();
742  }
743 
744  void run();
746  void extract();
748  size_t input_size() const;
750  size_t output_size() const;
751 };
752 
757  static const bool have_input_size_output_size = true;
759  std::vector<global> vglob;
761  std::vector<std::vector<Index> > inv_idx;
764  std::vector<std::vector<Index> > dep_idx;
765 
766  Index n, m;
767  Index input_size() const;
768  Index output_size() const;
769  ParalOp(const autopar &ap);
770 
771  template <class T>
772  void reverse(ReverseArgs<T> &args) {
773  bool parallel_operator_used_with_valid_type = false;
774  TMBAD_ASSERT(parallel_operator_used_with_valid_type);
775  }
776  static const bool add_forward_replay_copy = true;
777  template <class T>
778  void forward(ForwardArgs<T> &args) {
779  bool parallel_operator_used_with_valid_type = false;
780  TMBAD_ASSERT(parallel_operator_used_with_valid_type);
781  }
782 
783  void forward(ForwardArgs<Scalar> &args);
784  void reverse(ReverseArgs<Scalar> &args);
785  const char *op_name();
786  void print(global::print_config cfg);
787 };
788 
789 std::vector<Index> get_likely_expression_duplicates(
790  const global &glob, std::vector<Index> inv_remap);
791 
796 bool all_allow_remap(const global &glob);
797 
799 template <class T>
800 struct forbid_remap {
801  T &remap;
802  forbid_remap(T &remap) : remap(remap) {}
803  void operator()(Index a, Index b) {
804  bool ok = true;
805  for (Index i = a + 1; i <= b; i++) {
806  ok &= (remap[i] - remap[i - 1] == 1);
807  }
808  if (ok) {
809  return;
810  } else {
811  for (Index i = a; i <= b; i++) {
812  remap[i] = i;
813  }
814  }
815  }
816 };
817 
874 std::vector<Index> remap_identical_sub_expressions(
875  global &glob, std::vector<Index> inv_remap);
876 
878 
879 std::vector<Position> inv_positions(global &glob);
880 
886 void reorder_graph(global &glob, std::vector<Index> inv_idx);
887 
888 } // namespace TMBad
889 #endif // HAVE_GRAPH_TRANSFORM_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
std::vector< T > subset(const std::vector< T > &x, const std::vector< bool > &y)
Vector subset by boolean mask.
graph reverse_graph(std::vector< bool > keep_var=std::vector< bool >(0))
Construct operator graph with reverse connections.
Definition: TMBad.cpp:1584
size_t prod_int(const std::vector< size_t > &x)
Integer product function.
Definition: TMBad.cpp:3534
diff --git a/mask_8hpp_source.html b/mask_8hpp_source.html index da237a67c..40470e807 100644 --- a/mask_8hpp_source.html +++ b/mask_8hpp_source.html @@ -73,5 +73,5 @@
mask.hpp
-
1 template<int base, int n>
2 struct pow_t {
3  static const long int value = base * pow_t<base, n-1>::value;
4 };
5 template<int base>
6 struct pow_t<base, 0> {
7  static const long int value = 1;
8 };
9 template<long int mask>
10 struct mask_t {
11  template<int length, int i=0>
12  struct set_length {
13  static const int base = 8;
14  static const long int power = pow_t<base, i>::value;
15  typedef set_length<length, i+1> next_index_t;
16  static const int flag = ( (mask / power) % base ) != 0;
17  static const int count = flag + next_index_t::count;
18  static const int Id = count - 1;
19  static const int Index = length - i - 1;
20  next_index_t next_index;
21  template<class S, class T>
22  void copy(S &dest, const T &orig) {
23  dest[Index] = (flag ? orig[Id] : 0);
24  next_index.copy(dest, orig);
25  }
26  template<class S, class T>
27  void activate_derivs(S &var, T &value) {
28  var[Index] = value[Index];
29  if (flag) var[Index].setid(Id);
30  next_index.activate_derivs(var, value);
31  }
32  };
33  template<int length>
34  struct set_length<length, length> {
35  static const int count = 0;
36  void trace() { }
37  template<class S, class T>
38  void copy(S &dest, const T &orig) { }
39  template<class S, class T>
40  void activate_derivs(S &var, T &value) { }
41  };
42 };
43 
44 template<int nvar>
45 struct ADTypes {
46  typedef tiny_ad::variable<1, nvar> order1;
47  typedef tiny_ad::variable<2, nvar> order2;
48  typedef tiny_ad::variable<3, nvar> order3;
49 };
50 
51 // 'TMB_BIND_ATOMIC' depends on these:
52 #define NCHAR(x) sizeof(#x)-1
53 #define OCTAL(x) 0 ## x
54 
73 #define TMB_BIND_ATOMIC(NAME,MASK,CALL) \
74 TMB_ATOMIC_VECTOR_FUNCTION( \
75  NAME \
76  , \
77  (size_t) \
78  pow((double) \
79  atomic::mask_t<OCTAL(MASK)>::set_length< NCHAR(MASK) >::count, \
80  CppAD::Integer(tx[NCHAR(MASK)])) \
81  , \
82  int order = CppAD::Integer(tx[NCHAR(MASK)]); \
83  typedef \
84  atomic::mask_t<OCTAL(MASK)>::set_length<NCHAR(MASK)> mask_type; \
85  mask_type mask; \
86  static const int nvar = mask_type::count; \
87  atomic::tiny_vec_ref<double> tyref(&ty[0], ty.size()); \
88  if(order==0) { \
89  typedef double Float; \
90  CppAD::vector<Float> x(tx); \
91  ty[0] = CALL; \
92  } \
93  else if (order==1) { \
94  typedef typename atomic::ADTypes<nvar>::order1 Float; \
95  Float x[NCHAR(MASK)]; \
96  mask.activate_derivs(x, tx); \
97  tyref = CALL.getDeriv(); \
98  } \
99  else if (order==2) { \
100  typedef typename atomic::ADTypes<nvar>::order2 Float; \
101  Float x[NCHAR(MASK)]; \
102  mask.activate_derivs(x, tx); \
103  tyref = CALL.getDeriv(); \
104  } \
105  else if (order==3) { \
106  typedef typename atomic::ADTypes<nvar>::order3 Float; \
107  Float x[NCHAR(MASK)]; \
108  mask.activate_derivs(x, tx); \
109  tyref = CALL.getDeriv(); \
110  } \
111  else \
112  Rf_error("Order not implemented"); \
113  , \
114  typedef \
115  atomic::mask_t<OCTAL(MASK)>::set_length<NCHAR(MASK)> mask_type; \
116  mask_type mask; \
117  static const int nvar = mask_type::count; \
118  CppAD::vector<Type> tx_(tx); \
119  tx_[NCHAR(MASK)] = tx_[NCHAR(MASK)] + Type(1.0); \
120  vector<Type> tmp = NAME(tx_); \
121  matrix<Type> m = tmp.matrix(); \
122  m.resize(nvar, m.size() / nvar); \
123  vector<Type> w = py; \
124  vector<Type> px_ = m * w.matrix(); \
125  mask.copy(px, px_); \
126  px[NCHAR(MASK)] = 0; \
127  )
128 
129 // ======================================================================
130 
131 
132 #ifdef TMBAD_FRAMEWORK
133 
134 #undef TMB_BIND_ATOMIC
135 #ifndef TMB_MAX_ORDER
136 #define TMB_MAX_ORDER 3
137 #endif
138 
139 #define TMB_BIND_ATOMIC(NAME,MASK,CALL) \
140 template<int order, int ninput, int noutput, long int mask> \
141 struct NAME ## Eval { \
142  typedef typename \
143  atomic::mask_t<mask>::template set_length<ninput> mask_type; \
144  mask_type mask_; \
145  static const int nvar = mask_type::count; \
146  template <class S, class T> \
147  void operator()(S* tx, T* ty) { \
148  typedef atomic::tiny_ad::variable<order, nvar> Float; \
149  atomic::tiny_vec_ref<double> tyref(&(ty[0]), noutput); \
150  Float x[ninput]; \
151  mask_.activate_derivs(x, tx); \
152  tyref = (CALL).getDeriv(); \
153  } \
154 }; \
155 template<int ninput, int noutput, long int mask> \
156 struct NAME ## Eval<0, ninput, noutput, mask> { \
157  template <class S, class T> \
158  void operator()(S* tx, T* ty) { \
159  S* x = tx; \
160  ty[0] = (CALL); \
161  } \
162 }; \
163 template<int order, int ninput, int noutput, long int mask> \
164 struct NAME ## Op : TMBad::global::Operator<ninput, noutput> { \
165  static const bool add_forward_replay_copy = true; \
166  typedef typename \
167  atomic::mask_t<mask>::template set_length<ninput> mask_type; \
168  mask_type mask_; \
169  static const int nvar = mask_type::count; \
170  template <class S, class T> \
171  void eval(S* tx, T* ty) const { \
172  NAME ## Eval<order, ninput, noutput, mask>()(tx, ty); \
173  } \
174  std::vector<TMBad::ad_plain> \
175  add_to_tape(const std::vector<TMBad::ad_plain> &x) { \
176  TMBad::OperatorPure* pOp = TMBad::get_glob()->getOperator<NAME ## Op>(); \
177  return \
178  TMBad::get_glob()->add_to_stack<NAME ## Op>(pOp, x); \
179  } \
180  std::vector<TMBad::ad_plain> \
181  operator()(const std::vector<TMBad::ad_plain> &x) { \
182  return add_to_tape(x); \
183  } \
184  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> \
185  operator()(const Eigen::Array<TMBad::ad_aug, ninput, 1> &x) { \
186  std::vector<TMBad::ad_plain> x_(&(x(0)), &(x(0)) + x.size()); \
187  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> ans; \
188  std::vector<TMBad::ad_plain> y = add_to_tape(x_); \
189  for (size_t i=0; i<y.size(); i++) ans(i) = y[i]; \
190  return ans; \
191  } \
192  Eigen::Matrix<double, nvar, noutput / nvar> \
193  operator()(const Eigen::Array<double, ninput, 1> &x) { \
194  Eigen::Matrix<double, nvar, noutput / nvar> ans; \
195  eval(&(x(0)), &(ans(0))); \
196  return ans; \
197  } \
198  template<class Type> \
199  void forward(TMBad::ForwardArgs<Type> &args) { \
200  Rf_error("Un-implemented method request"); \
201  } \
202  void forward(TMBad::ForwardArgs<double> &args) { \
203  double x[ninput]; \
204  for (size_t i=0; i<ninput; i++) x[i] = args.x(i); \
205  eval(x, &(args.y(0))); \
206  } \
207  template<class Type> \
208  void reverse(TMBad::ReverseArgs<Type> &args) { \
209  Eigen::Array<Type, ninput, 1> tx; \
210  for (size_t i=0; i<ninput; i++) tx(i) = args.x(i); \
211  Eigen::Matrix<Type, noutput, 1> w; \
212  for (size_t i=0; i<noutput; i++) w(i) = args.dy(i); \
213  NAME ## Op<order+1, ninput, noutput * nvar, mask> foo; \
214  Eigen::Matrix<Type, nvar, noutput> ty; \
215  ty = foo(tx); \
216  Eigen::Matrix<Type, nvar, 1> tyw = ty * w; \
217  Type tmp[ninput]; \
218  mask_.copy(tmp, &(tyw[0])); \
219  for (size_t i=0; i<ninput; i++) args.dx(i) += tmp[i]; \
220  } \
221  void reverse(TMBad::ReverseArgs<TMBad::Writer> &args) { \
222  Rf_error("Un-implemented method request"); \
223  } \
224  const char* op_name() { return #NAME ; } \
225 }; \
226 template<int ninput, int noutput, long int mask> \
227 struct NAME ## Op<TMB_MAX_ORDER+1, ninput, noutput, mask> { \
228  typedef typename \
229  atomic::mask_t<mask>::template set_length<ninput> mask_type; \
230  mask_type mask_; \
231  static const int nvar = mask_type::count; \
232  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> \
233  operator()(const Eigen::Array<TMBad::ad_aug, ninput, 1> &x) { \
234  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> ans; \
235  Rf_error("Order not implemented. Please increase TMB_MAX_ORDER"); \
236  return ans; \
237  } \
238  Eigen::Matrix<double, nvar, noutput / nvar> \
239  operator()(const Eigen::Array<double, ninput, 1> &x) { \
240  Eigen::Matrix<double, nvar, noutput / nvar> ans; \
241  Rf_error("Order not implemented. Please increase TMB_MAX_ORDER"); \
242  return ans; \
243  } \
244 }; \
245 template<class dummy=void> \
246 CppAD::vector<double> \
247 NAME (const CppAD::vector<double> &x) CSKIP({ \
248  int n = x.size() - 1; \
249  int order = CppAD::Integer(x[n]); \
250  typedef NAME ## Op<0, NCHAR(MASK), 1, OCTAL(MASK)> Foo0; \
251  static const int nvar = Foo0::nvar; \
252  typedef NAME ## Op<1, NCHAR(MASK), nvar, OCTAL(MASK)> Foo1; \
253  if (order==0) { \
254  CppAD::vector<double> y(1); \
255  y[0] = CALL; \
256  return y; \
257  } \
258  else if (order==1) { \
259  Foo1 foo1; \
260  CppAD::vector<double> y(nvar); \
261  foo1.eval(&x[0], &y[0]); \
262  return y; \
263  } \
264  else { \
265  Rf_error("This interface is limited to 0th and 1st deriv order"); \
266  } \
267 }) \
268 template<class dummy=void> \
269 CppAD::vector<TMBad::ad_aug> \
270 NAME (const CppAD::vector<TMBad::ad_aug> &x) CSKIP({ \
271  bool all_constant = true; \
272  for (size_t i = 0; i<x.size(); i++) \
273  all_constant &= x[i].constant(); \
274  if (all_constant) { \
275  CppAD::vector<double> xd(x.size()); \
276  for (size_t i=0; i<xd.size(); i++) xd[i] = x[i].Value(); \
277  CppAD::vector<double> yd = NAME(xd); \
278  CppAD::vector<TMBad::ad_aug> y(yd.size()); \
279  for (size_t i=0; i<yd.size(); i++) y[i] = yd[i]; \
280  return y; \
281  } \
282  int n = x.size() - 1; \
283  int order = CppAD::Integer(x[n]); \
284  std::vector<TMBad::ad_plain> x_(&(x[0]), &(x[0]) + n); \
285  std::vector<TMBad::ad_plain> y_; \
286  typedef NAME ## Op<0, NCHAR(MASK), 1, OCTAL(MASK)> Foo0; \
287  static const int nvar = Foo0::nvar; \
288  typedef NAME ## Op<1, NCHAR(MASK), nvar, OCTAL(MASK)> Foo1; \
289  if (order==0) { \
290  Foo0 foo0; \
291  y_ = foo0(x_); \
292  } \
293  else if (order==1) { \
294  Foo1 foo1; \
295  y_ = foo1(x_); \
296  } \
297  else { \
298  Rf_error("This interface is limited to 0th and 1st deriv order"); \
299  } \
300  CppAD::vector<TMBad::ad_aug> y(y_.size()); \
301  for (size_t i=0; i<y.size(); i++) y[i] = y_[i]; \
302  return y; \
303 }) \
304 IF_TMB_PRECOMPILE( \
305 template \
306 CppAD::vector<TMBad::ad_aug> \
307 NAME<> (const CppAD::vector<TMBad::ad_aug> &x); \
308 template \
309 CppAD::vector<double> \
310 NAME<> (const CppAD::vector<double> &x); \
311 )
312 
313 #endif // TMBAD_FRAMEWORK
+
1 template<int base, int n>
2 struct pow_t {
3  static const long int value = base * pow_t<base, n-1>::value;
4 };
5 template<int base>
6 struct pow_t<base, 0> {
7  static const long int value = 1;
8 };
9 template<long int mask>
10 struct mask_t {
11  template<int length, int i=0>
12  struct set_length {
13  static const int base = 8;
14  static const long int power = pow_t<base, i>::value;
15  typedef set_length<length, i+1> next_index_t;
16  static const int flag = ( (mask / power) % base ) != 0;
17  static const int count = flag + next_index_t::count;
18  static const int Id = count - 1;
19  static const int Index = length - i - 1;
20  next_index_t next_index;
21  template<class S, class T>
22  void copy(S &dest, const T &orig) {
23  dest[Index] = (flag ? orig[Id] : 0);
24  next_index.copy(dest, orig);
25  }
26  template<class S, class T>
27  void activate_derivs(S &var, T &value) {
28  var[Index] = value[Index];
29  if (flag) var[Index].setid(Id);
30  next_index.activate_derivs(var, value);
31  }
32  };
33  template<int length>
34  struct set_length<length, length> {
35  static const int count = 0;
36  void trace() { }
37  template<class S, class T>
38  void copy(S &dest, const T &orig) { }
39  template<class S, class T>
40  void activate_derivs(S &var, T &value) { }
41  };
42 };
43 
44 template<int nvar>
45 struct ADTypes {
46  typedef tiny_ad::variable<1, nvar> order1;
47  typedef tiny_ad::variable<2, nvar> order2;
48  typedef tiny_ad::variable<3, nvar> order3;
49 };
50 
51 // 'TMB_BIND_ATOMIC' depends on these:
52 #define NCHAR(x) sizeof(#x)-1
53 #define OCTAL(x) 0 ## x
54 
73 #define TMB_BIND_ATOMIC(NAME,MASK,CALL) \
74 TMB_ATOMIC_VECTOR_FUNCTION( \
75  NAME \
76  , \
77  (size_t) \
78  pow((double) \
79  atomic::mask_t<OCTAL(MASK)>::set_length< NCHAR(MASK) >::count, \
80  CppAD::Integer(tx[NCHAR(MASK)])) \
81  , \
82  int order = CppAD::Integer(tx[NCHAR(MASK)]); \
83  typedef \
84  atomic::mask_t<OCTAL(MASK)>::set_length<NCHAR(MASK)> mask_type; \
85  mask_type mask; \
86  static const int nvar = mask_type::count; \
87  atomic::tiny_vec_ref<double> tyref(&ty[0], ty.size()); \
88  if(order==0) { \
89  typedef double Float; \
90  CppAD::vector<Float> x(tx); \
91  ty[0] = CALL; \
92  } \
93  else if (order==1) { \
94  typedef typename atomic::ADTypes<nvar>::order1 Float; \
95  Float x[NCHAR(MASK)]; \
96  mask.activate_derivs(x, tx); \
97  tyref = CALL.getDeriv(); \
98  } \
99  else if (order==2) { \
100  typedef typename atomic::ADTypes<nvar>::order2 Float; \
101  Float x[NCHAR(MASK)]; \
102  mask.activate_derivs(x, tx); \
103  tyref = CALL.getDeriv(); \
104  } \
105  else if (order==3) { \
106  typedef typename atomic::ADTypes<nvar>::order3 Float; \
107  Float x[NCHAR(MASK)]; \
108  mask.activate_derivs(x, tx); \
109  tyref = CALL.getDeriv(); \
110  } \
111  else \
112  Rf_error("Order not implemented"); \
113  , \
114  typedef \
115  atomic::mask_t<OCTAL(MASK)>::set_length<NCHAR(MASK)> mask_type; \
116  mask_type mask; \
117  static const int nvar = mask_type::count; \
118  CppAD::vector<Type> tx_(tx); \
119  tx_[NCHAR(MASK)] = tx_[NCHAR(MASK)] + Type(1.0); \
120  vector<Type> tmp = NAME(tx_); \
121  matrix<Type> m = tmp.matrix(); \
122  m.resize(nvar, m.size() / nvar); \
123  vector<Type> w = py; \
124  vector<Type> px_ = m * w.matrix(); \
125  mask.copy(px, px_); \
126  px[NCHAR(MASK)] = 0; \
127  )
128 
129 // ======================================================================
130 
131 
132 #ifdef TMBAD_FRAMEWORK
133 
134 #undef TMB_BIND_ATOMIC
135 #ifndef TMB_MAX_ORDER
136 #define TMB_MAX_ORDER 3
137 #endif
138 
139 #define TMB_BIND_ATOMIC(NAME,MASK,CALL) \
140 template<int order, int ninput, int noutput, long int mask> \
141 struct NAME ## Eval { \
142  typedef typename \
143  atomic::mask_t<mask>::template set_length<ninput> mask_type; \
144  mask_type mask_; \
145  static const int nvar = mask_type::count; \
146  template <class S, class T> \
147  void operator()(S* tx, T* ty) { \
148  typedef atomic::tiny_ad::variable<order, nvar> Float; \
149  atomic::tiny_vec_ref<double> tyref(&(ty[0]), noutput); \
150  Float x[ninput]; \
151  mask_.activate_derivs(x, tx); \
152  tyref = (CALL).getDeriv(); \
153  } \
154 }; \
155 template<int ninput, int noutput, long int mask> \
156 struct NAME ## Eval<0, ninput, noutput, mask> { \
157  template <class S, class T> \
158  void operator()(S* tx, T* ty) { \
159  S* x = tx; \
160  ty[0] = (CALL); \
161  } \
162 }; \
163 template<int order, int ninput, int noutput, long int mask> \
164 struct NAME ## Op : TMBad::global::Operator<ninput, noutput> { \
165  static const bool add_forward_replay_copy = true; \
166  typedef typename \
167  atomic::mask_t<mask>::template set_length<ninput> mask_type; \
168  mask_type mask_; \
169  static const int nvar = mask_type::count; \
170  template <class S, class T> \
171  void eval(S* tx, T* ty) const { \
172  NAME ## Eval<order, ninput, noutput, mask>()(tx, ty); \
173  } \
174  std::vector<TMBad::ad_plain> \
175  add_to_tape(const std::vector<TMBad::ad_plain> &x) { \
176  TMBad::OperatorPure* pOp = TMBad::get_glob()->getOperator<NAME ## Op>(); \
177  return \
178  TMBad::get_glob()->add_to_stack<NAME ## Op>(pOp, x); \
179  } \
180  std::vector<TMBad::ad_plain> \
181  operator()(const std::vector<TMBad::ad_plain> &x) { \
182  return add_to_tape(x); \
183  } \
184  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> \
185  operator()(const Eigen::Array<TMBad::ad_aug, ninput, 1> &x) { \
186  std::vector<TMBad::ad_plain> x_(&(x(0)), &(x(0)) + x.size()); \
187  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> ans; \
188  std::vector<TMBad::ad_plain> y = add_to_tape(x_); \
189  for (size_t i=0; i<y.size(); i++) ans(i) = y[i]; \
190  return ans; \
191  } \
192  Eigen::Matrix<double, nvar, noutput / nvar> \
193  operator()(const Eigen::Array<double, ninput, 1> &x) { \
194  Eigen::Matrix<double, nvar, noutput / nvar> ans; \
195  eval(&(x(0)), &(ans(0))); \
196  return ans; \
197  } \
198  template<class Type> \
199  void forward(TMBad::ForwardArgs<Type> &args) { \
200  Rf_error("Un-implemented method request"); \
201  } \
202  void forward(TMBad::ForwardArgs<double> &args) { \
203  double x[ninput]; \
204  for (size_t i=0; i<ninput; i++) x[i] = args.x(i); \
205  eval(x, &(args.y(0))); \
206  } \
207  template<class Type> \
208  void reverse(TMBad::ReverseArgs<Type> &args) { \
209  Eigen::Array<Type, ninput, 1> tx; \
210  for (size_t i=0; i<ninput; i++) tx(i) = args.x(i); \
211  Eigen::Matrix<Type, noutput, 1> w; \
212  for (size_t i=0; i<noutput; i++) w(i) = args.dy(i); \
213  NAME ## Op<order+1, ninput, noutput * nvar, mask> foo; \
214  Eigen::Matrix<Type, nvar, noutput> ty; \
215  ty = foo(tx); \
216  Eigen::Matrix<Type, nvar, 1> tyw = ty * w; \
217  Type tmp[ninput]; \
218  mask_.copy(tmp, &(tyw[0])); \
219  for (size_t i=0; i<ninput; i++) args.dx(i) += tmp[i]; \
220  } \
221  void reverse(TMBad::ReverseArgs<TMBad::Writer> &args) { \
222  Rf_error("Un-implemented method request"); \
223  } \
224  const char* op_name() { return #NAME ; } \
225 }; \
226 template<int ninput, int noutput, long int mask> \
227 struct NAME ## Op<TMB_MAX_ORDER+1, ninput, noutput, mask> { \
228  typedef typename \
229  atomic::mask_t<mask>::template set_length<ninput> mask_type; \
230  mask_type mask_; \
231  static const int nvar = mask_type::count; \
232  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> \
233  operator()(const Eigen::Array<TMBad::ad_aug, ninput, 1> &x) { \
234  Eigen::Matrix<TMBad::ad_aug, nvar, noutput / nvar> ans; \
235  Rf_error("Order not implemented. Please increase TMB_MAX_ORDER"); \
236  return ans; \
237  } \
238  Eigen::Matrix<double, nvar, noutput / nvar> \
239  operator()(const Eigen::Array<double, ninput, 1> &x) { \
240  Eigen::Matrix<double, nvar, noutput / nvar> ans; \
241  Rf_error("Order not implemented. Please increase TMB_MAX_ORDER"); \
242  return ans; \
243  } \
244 }; \
245 template<class dummy=void> \
246 CppAD::vector<double> \
247 NAME (const CppAD::vector<double> &x) CSKIP_ATOMIC({ \
248  int n = x.size() - 1; \
249  int order = CppAD::Integer(x[n]); \
250  typedef NAME ## Op<0, NCHAR(MASK), 1, OCTAL(MASK)> Foo0; \
251  static const int nvar = Foo0::nvar; \
252  typedef NAME ## Op<1, NCHAR(MASK), nvar, OCTAL(MASK)> Foo1; \
253  if (order==0) { \
254  CppAD::vector<double> y(1); \
255  y[0] = CALL; \
256  return y; \
257  } \
258  else if (order==1) { \
259  Foo1 foo1; \
260  CppAD::vector<double> y(nvar); \
261  foo1.eval(&x[0], &y[0]); \
262  return y; \
263  } \
264  else { \
265  Rf_error("This interface is limited to 0th and 1st deriv order"); \
266  } \
267 }) \
268 template<class dummy=void> \
269 CppAD::vector<TMBad::ad_aug> \
270 NAME (const CppAD::vector<TMBad::ad_aug> &x) CSKIP_ATOMIC({ \
271  bool all_constant = true; \
272  for (size_t i = 0; i<x.size(); i++) \
273  all_constant &= x[i].constant(); \
274  if (all_constant) { \
275  CppAD::vector<double> xd(x.size()); \
276  for (size_t i=0; i<xd.size(); i++) xd[i] = x[i].Value(); \
277  CppAD::vector<double> yd = NAME(xd); \
278  CppAD::vector<TMBad::ad_aug> y(yd.size()); \
279  for (size_t i=0; i<yd.size(); i++) y[i] = yd[i]; \
280  return y; \
281  } \
282  int n = x.size() - 1; \
283  int order = CppAD::Integer(x[n]); \
284  std::vector<TMBad::ad_plain> x_(&(x[0]), &(x[0]) + n); \
285  std::vector<TMBad::ad_plain> y_; \
286  typedef NAME ## Op<0, NCHAR(MASK), 1, OCTAL(MASK)> Foo0; \
287  static const int nvar = Foo0::nvar; \
288  typedef NAME ## Op<1, NCHAR(MASK), nvar, OCTAL(MASK)> Foo1; \
289  if (order==0) { \
290  Foo0 foo0; \
291  y_ = foo0(x_); \
292  } \
293  else if (order==1) { \
294  Foo1 foo1; \
295  y_ = foo1(x_); \
296  } \
297  else { \
298  Rf_error("This interface is limited to 0th and 1st deriv order"); \
299  } \
300  CppAD::vector<TMBad::ad_aug> y(y_.size()); \
301  for (size_t i=0; i<y.size(); i++) y[i] = y_[i]; \
302  return y; \
303 }) \
304 IF_TMB_PRECOMPILE_ATOMICS( \
305 template \
306 CppAD::vector<TMBad::ad_aug> \
307 NAME<> (const CppAD::vector<TMBad::ad_aug> &x); \
308 template \
309 CppAD::vector<double> \
310 NAME<> (const CppAD::vector<double> &x); \
311 )
312 
313 #endif // TMBAD_FRAMEWORK
License: GPL v2 diff --git a/tmb__enable__header__only_8hpp_source.html b/tmb__enable__header__only_8hpp_source.html index 8036e0ccb..d10b73ad2 100644 --- a/tmb__enable__header__only_8hpp_source.html +++ b/tmb__enable__header__only_8hpp_source.html @@ -73,5 +73,5 @@
tmb_enable_header_only.hpp
-Go to the documentation of this file.
1 
4 #undef WITH_LIBTMB
5 #undef TMB_PRECOMPILE
6 #undef CSKIP
7 #undef IF_TMB_PRECOMPILE
8 #undef TMB_EXTERN
9 // Redefine
10 #define WITH_LIBTMB
11 #undef TMB_PRECOMPILE
12 #define CSKIP(...) ;
13 #define IF_TMB_PRECOMPILE(...)
14 #define TMB_EXTERN extern
+Go to the documentation of this file.
1 
4 #undef WITH_LIBTMB
5 #undef TMB_PRECOMPILE_ATOMICS
6 #undef CSKIP
7 #undef IF_TMB_PRECOMPILE_ATOMICS
8 #undef TMB_EXTERN
9 // Redefine
10 #define WITH_LIBTMB
11 #undef TMB_PRECOMPILE_ATOMICS
12 #define CSKIP(...) ;
13 #define IF_TMB_PRECOMPILE_ATOMICS(...)
14 #define TMB_EXTERN extern
License: GPL v2 diff --git a/tmb__enable__precompile_8hpp_source.html b/tmb__enable__precompile_8hpp_source.html index 55f653428..62257e44f 100644 --- a/tmb__enable__precompile_8hpp_source.html +++ b/tmb__enable__precompile_8hpp_source.html @@ -73,5 +73,5 @@
tmb_enable_precompile.hpp
-Go to the documentation of this file.
1 
4 #undef WITH_LIBTMB
5 #undef TMB_PRECOMPILE
6 #undef CSKIP
7 #undef IF_TMB_PRECOMPILE
8 #undef TMB_EXTERN
9 // Redefine
10 #undef WITH_LIBTMB
11 #define TMB_PRECOMPILE
12 #define CSKIP(...) __VA_ARGS__
13 #define IF_TMB_PRECOMPILE(...) __VA_ARGS__
14 #define TMB_EXTERN
+Go to the documentation of this file.
1 
4 #undef WITH_LIBTMB
5 #undef TMB_PRECOMPILE_ATOMICS
6 #undef CSKIP
7 #undef IF_TMB_PRECOMPILE_ATOMICS
8 #undef TMB_EXTERN
9 // Redefine
10 #undef WITH_LIBTMB
11 #define TMB_PRECOMPILE_ATOMICS
12 #define CSKIP(...) __VA_ARGS__
13 #define IF_TMB_PRECOMPILE_ATOMICS(...) __VA_ARGS__
14 #define TMB_EXTERN
License: GPL v2 diff --git a/tmbad__allow__comparison_8hpp_source.html b/tmbad__allow__comparison_8hpp_source.html index 599c9aa04..ae35f3506 100644 --- a/tmbad__allow__comparison_8hpp_source.html +++ b/tmbad__allow__comparison_8hpp_source.html @@ -73,6 +73,6 @@
tmbad_allow_comparison.hpp
-
1 #ifndef HAVE_TMBAD_ALLOW_COMPARISON_HPP
2 #define HAVE_TMBAD_ALLOW_COMPARISON_HPP
3 // Autogenerated - do not edit by hand !
4 #include "global.hpp"
5 
6 namespace TMBad {
12 bool operator<(const ad_aug &x, const ad_aug &y);
13 bool operator<(const Scalar &x, const ad_aug &y);
14 bool operator<=(const ad_aug &x, const ad_aug &y);
15 bool operator<=(const Scalar &x, const ad_aug &y);
16 bool operator>(const ad_aug &x, const ad_aug &y);
17 bool operator>(const Scalar &x, const ad_aug &y);
18 bool operator>=(const ad_aug &x, const ad_aug &y);
19 bool operator>=(const Scalar &x, const ad_aug &y);
20 bool operator==(const ad_aug &x, const ad_aug &y);
21 bool operator==(const Scalar &x, const ad_aug &y);
22 bool operator!=(const ad_aug &x, const ad_aug &y);
23 bool operator!=(const Scalar &x, const ad_aug &y);
24 
25 } // namespace TMBad
26 #endif // HAVE_TMBAD_ALLOW_COMPARISON_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:148
+
1 #ifndef HAVE_TMBAD_ALLOW_COMPARISON_HPP
2 #define HAVE_TMBAD_ALLOW_COMPARISON_HPP
3 // Autogenerated - do not edit by hand !
4 #include "global.hpp"
5 
6 namespace TMBad {
12 bool operator<(const ad_aug &x, const ad_aug &y);
13 bool operator<(const Scalar &x, const ad_aug &y);
14 bool operator<=(const ad_aug &x, const ad_aug &y);
15 bool operator<=(const Scalar &x, const ad_aug &y);
16 bool operator>(const ad_aug &x, const ad_aug &y);
17 bool operator>(const Scalar &x, const ad_aug &y);
18 bool operator>=(const ad_aug &x, const ad_aug &y);
19 bool operator>=(const Scalar &x, const ad_aug &y);
20 bool operator==(const ad_aug &x, const ad_aug &y);
21 bool operator==(const Scalar &x, const ad_aug &y);
22 bool operator!=(const ad_aug &x, const ad_aug &y);
23 bool operator!=(const Scalar &x, const ad_aug &y);
24 
25 } // namespace TMBad
26 #endif // HAVE_TMBAD_ALLOW_COMPARISON_HPP
Automatic differentiation library designed for TMB.
Definition: TMB.hpp:153
License: GPL v2 diff --git a/tmbad__atomic__macro_8hpp_source.html b/tmbad__atomic__macro_8hpp_source.html index 927991d53..31faef866 100644 --- a/tmbad__atomic__macro_8hpp_source.html +++ b/tmbad__atomic__macro_8hpp_source.html @@ -73,5 +73,5 @@
tmbad_atomic_macro.hpp
-
1 
2 
3 #define TMB_ATOMIC_VECTOR_FUNCTION_DEFINE( \
4  ATOMIC_NAME, OUTPUT_DIM, \
5  ATOMIC_DOUBLE, \
6  ATOMIC_REVERSE \
7 ) \
8 template<class dummy> \
9 CppAD::vector<TMBad::ad_aug> \
10 ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &x); \
11 template<class dummy> \
12 CppAD::vector<double> \
13 ATOMIC_NAME (const CppAD::vector<double> &tx) CSKIP({ \
14  CppAD::vector<double> ty(OUTPUT_DIM); \
15  ATOMIC_DOUBLE; \
16  return ty; \
17 }) \
18 template<class dummy=void> \
19 struct ATOMIC_NAME ## Op : TMBad::global::DynamicInputOutputOperator { \
20  typedef TMBad::global::DynamicInputOutputOperator Base; \
21  ATOMIC_NAME ## Op (TMBad::Index n, TMBad::Index m) : Base(n, m) {} \
22  const char* op_name() { return #ATOMIC_NAME; } \
23  static const bool add_static_identifier = true; \
24  void forward(TMBad::ForwardArgs<TMBad::Scalar> _args_) { \
25  CppAD::vector<TMBad::Scalar> tx(this->input_size()); \
26  CppAD::vector<TMBad::Scalar> ty(this->output_size()); \
27  for (size_t i=0; i<tx.size(); i++) tx[i] = _args_.x(i); \
28  ATOMIC_DOUBLE; \
29  for (size_t i=0; i<ty.size(); i++) _args_.y(i) = ty[i]; \
30  } \
31  void forward(TMBad::ForwardArgs<TMBad::Replay> _args_) { \
32  CppAD::vector<TMBad::Replay> tx(this->input_size()); \
33  for (size_t i=0; i<tx.size(); i++) tx[i] = _args_.x(i); \
34  CppAD::vector<TMBad::Replay> ty = ATOMIC_NAME(tx); \
35  for (size_t i=0; i<ty.size(); i++) _args_.y(i) = ty[i]; \
36  } \
37  template<class Type> void reverse(TMBad::ReverseArgs<Type> _args_) { \
38  if (isDouble<Type>::value && \
39  this->output_size() == 1 && \
40  _args_.dy(0) == Type(0)) { \
41  return; \
42  } \
43  CppAD::vector<Type> tx(this->input_size()); \
44  CppAD::vector<Type> ty(this->output_size()); \
45  CppAD::vector<Type> px(this->input_size()); \
46  CppAD::vector<Type> py(this->output_size()); \
47  for (size_t i=0; i<tx.size(); i++) tx[i] = _args_.x(i); \
48  for (size_t i=0; i<ty.size(); i++) ty[i] = _args_.y(i); \
49  for (size_t i=0; i<py.size(); i++) py[i] = _args_.dy(i); \
50  ATOMIC_REVERSE; \
51  for (size_t i=0; i<px.size(); i++) _args_.dx(i) += px[i]; \
52  } \
53  void forward \
54  (TMBad::ForwardArgs<TMBad::Writer> &args) { TMBAD_ASSERT(false); } \
55  void reverse \
56  (TMBad::ReverseArgs<TMBad::Writer> &args) { TMBAD_ASSERT(false); } \
57 }; \
58 template<class dummy> \
59 CppAD::vector<TMBad::ad_aug> \
60 ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &tx) CSKIP({ \
61  TMBad::Index n = tx.size(); \
62  TMBad::Index m = OUTPUT_DIM; \
63  typedef ATOMIC_NAME ## Op <> OP; \
64  bool all_constant = true; \
65  for (size_t i = 0; i<tx.size(); i++) \
66  all_constant &= tx[i].constant(); \
67  CppAD::vector<TMBad::ad_aug> ty(m); \
68  if (all_constant) { \
69  CppAD::vector<double> xd(tx.size()); \
70  for (size_t i=0; i<xd.size(); i++) xd[i] = tx[i].Value(); \
71  CppAD::vector<double> yd = ATOMIC_NAME(xd); \
72  for (size_t i=0; i<yd.size(); i++) ty[i] = yd[i]; \
73  } else { \
74  TMBad::OperatorPure* \
75  pOp = TMBad::get_glob()->getOperator<OP>(n, m); \
76  std::vector<TMBad::ad_plain> \
77  x(&tx[0], &tx[0] + tx.size()); \
78  std::vector<TMBad::ad_plain> \
79  y = TMBad::get_glob()->add_to_stack<OP>(pOp, x); \
80  for (size_t i=0; i<y.size(); i++) ty[i] = y[i]; \
81  } \
82  return ty; \
83 }) \
84 template<class dummy=void> \
85 void ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &tx, \
86  CppAD::vector<TMBad::ad_aug> &ty) { \
87  ty = ATOMIC_NAME(tx); \
88 } \
89 IF_TMB_PRECOMPILE( \
90 template \
91 CppAD::vector<double> \
92 ATOMIC_NAME<> (const CppAD::vector<double>& tx); \
93 template \
94 CppAD::vector<TMBad::ad_aug> \
95 ATOMIC_NAME<>(const CppAD::vector<TMBad::ad_aug>& tx); \
96 )
97 
98 #define TMB_ATOMIC_STATIC_FUNCTION( \
99  ATOMIC_NAME, \
100  INPUT_SIZE, \
101  ATOMIC_DOUBLE, \
102  ATOMIC_REVERSE \
103 ) \
104 template<class dummy=void> \
105 CppAD::vector<TMBad::ad_aug> ATOMIC_NAME \
106 (const CppAD::vector<TMBad::ad_aug> &x); \
107 template<class dummy=void> \
108 CppAD::vector<double> ATOMIC_NAME \
109 (const CppAD::vector<double> &tx) CSKIP({ \
110  CppAD::vector<double> ty(1); \
111  ATOMIC_DOUBLE; \
112  return ty; \
113 }) \
114 template<class dummy=void> \
115 double ATOMIC_NAME (const double *tx) { \
116  double ty[1]; \
117  ATOMIC_DOUBLE; \
118  return ty[0]; \
119 } \
120 template<class dummy=void> \
121 TMBad::ad_aug ATOMIC_NAME (const TMBad::ad_aug *tx) { \
122  CppAD::vector<TMBad::ad_aug> tx_(INPUT_SIZE); \
123  for (size_t i=0; i<INPUT_SIZE; i++) tx_[i]=tx[i]; \
124  return ATOMIC_NAME(tx_)[0]; \
125 } \
126 template<class dummy=void> \
127 struct ATOMIC_NAME ## Op : TMBad::global::Operator<INPUT_SIZE, 1> { \
128  ATOMIC_NAME ## Op () {} \
129  const char* op_name() { return #ATOMIC_NAME; } \
130  void forward(TMBad::ForwardArgs<TMBad::Scalar> _args_) { \
131  TMBad::Scalar tx[INPUT_SIZE]; \
132  TMBad::Scalar ty[1] ; \
133  for (size_t i=0; i<INPUT_SIZE; i++) tx[i] = _args_.x(i); \
134  ATOMIC_DOUBLE; \
135  for (size_t i=0; i<1; i++) _args_.y(i) = ty[i]; \
136  } \
137  static const bool add_forward_replay_copy = true; \
138  template<class Type> void no_W_set_but_not_used(Type *p) { } \
139  template<class Type> void reverse(TMBad::ReverseArgs<Type> _args_) { \
140  Type tx[INPUT_SIZE]; \
141  Type ty[1] ; \
142  Type px[INPUT_SIZE]; \
143  Type py[1] ; \
144  no_W_set_but_not_used(tx); \
145  no_W_set_but_not_used(ty); \
146  no_W_set_but_not_used(py); \
147  for (size_t i=0; i<INPUT_SIZE; i++) tx[i] = _args_.x(i); \
148  for (size_t i=0; i<1 ; i++) ty[i] = _args_.y(i); \
149  for (size_t i=0; i<1 ; i++) py[i] = _args_.dy(i); \
150  ATOMIC_REVERSE; \
151  for (size_t i=0; i<INPUT_SIZE; i++) _args_.dx(i) += px[i]; \
152  } \
153  template<class Type> \
154  void forward \
155  (TMBad::ForwardArgs<Type> &args) { TMBAD_ASSERT(false); } \
156  void reverse \
157  (TMBad::ReverseArgs<TMBad::Writer> &args) { TMBAD_ASSERT(false); } \
158 }; \
159 template<class dummy> \
160 CppAD::vector<TMBad::ad_aug> ATOMIC_NAME \
161 (const CppAD::vector<TMBad::ad_aug> &tx) CSKIP({ \
162  TMBad::Index m = 1; \
163  typedef ATOMIC_NAME ## Op <> OP; \
164  bool all_constant = true; \
165  for (size_t i = 0; i<tx.size(); i++) \
166  all_constant &= tx[i].constant(); \
167  CppAD::vector<TMBad::ad_aug> ty(m); \
168  if (all_constant) { \
169  CppAD::vector<double> xd(tx.size()); \
170  for (size_t i=0; i<xd.size(); i++) xd[i] = tx[i].Value(); \
171  CppAD::vector<double> yd = ATOMIC_NAME(xd); \
172  for (size_t i=0; i<yd.size(); i++) ty[i] = yd[i]; \
173  } else { \
174  TMBad::OperatorPure* \
175  pOp = TMBad::get_glob()->getOperator<OP>(); \
176  std::vector<TMBad::ad_plain> \
177  x(&tx[0], &tx[0] + tx.size()); \
178  std::vector<TMBad::ad_plain> \
179  y = TMBad::get_glob()->add_to_stack<OP>(pOp, x); \
180  for (size_t i=0; i<y.size(); i++) ty[i] = y[i]; \
181  } \
182  return ty; \
183 }) \
184 template<class dummy=void> \
185 void ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &tx, \
186  CppAD::vector<TMBad::ad_aug> &ty) { \
187  ty = ATOMIC_NAME(tx); \
188 } \
189 IF_TMB_PRECOMPILE( \
190 template \
191 CppAD::vector<double> \
192 ATOMIC_NAME<> (const CppAD::vector<double>& tx); \
193 template \
194 CppAD::vector<TMBad::ad_aug> \
195 ATOMIC_NAME<>(const CppAD::vector<TMBad::ad_aug>& tx); \
196 )
197 // Helper to forward declare atomic
198 #define TMB_ATOMIC_VECTOR_FUNCTION_DECLARE(ATOMIC_NAME) \
199 template<class dummy=void> \
200 CppAD::vector<TMBad::ad_aug> \
201 ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &x); \
202 template<class dummy=void> \
203 CppAD::vector<double> \
204 ATOMIC_NAME (const CppAD::vector<double> &tx);
205 
217 #define TMB_ATOMIC_VECTOR_FUNCTION( \
218  ATOMIC_NAME, OUTPUT_DIM, \
219  ATOMIC_DOUBLE, \
220  ATOMIC_REVERSE \
221 ) \
222 TMB_ATOMIC_VECTOR_FUNCTION_DECLARE(ATOMIC_NAME) \
223 TMB_ATOMIC_VECTOR_FUNCTION_DEFINE( \
224  ATOMIC_NAME, OUTPUT_DIM, \
225  ATOMIC_DOUBLE, \
226  ATOMIC_REVERSE \
227 )
+
1 
2 
3 #define TMB_ATOMIC_VECTOR_FUNCTION_DEFINE( \
4  ATOMIC_NAME, OUTPUT_DIM, \
5  ATOMIC_DOUBLE, \
6  ATOMIC_REVERSE \
7 ) \
8 template<class dummy> \
9 CppAD::vector<TMBad::ad_aug> \
10 ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &x); \
11 template<class dummy> \
12 CppAD::vector<double> \
13 ATOMIC_NAME (const CppAD::vector<double> &tx) CSKIP_ATOMIC({ \
14  CppAD::vector<double> ty(OUTPUT_DIM); \
15  ATOMIC_DOUBLE; \
16  return ty; \
17 }) \
18 template<class dummy=void> \
19 struct ATOMIC_NAME ## Op : TMBad::global::DynamicInputOutputOperator { \
20  typedef TMBad::global::DynamicInputOutputOperator Base; \
21  ATOMIC_NAME ## Op (TMBad::Index n, TMBad::Index m) : Base(n, m) {} \
22  const char* op_name() { return #ATOMIC_NAME; } \
23  static const bool add_static_identifier = true; \
24  void forward(TMBad::ForwardArgs<TMBad::Scalar> _args_) { \
25  CppAD::vector<TMBad::Scalar> tx(this->input_size()); \
26  CppAD::vector<TMBad::Scalar> ty(this->output_size()); \
27  for (size_t i=0; i<tx.size(); i++) tx[i] = _args_.x(i); \
28  ATOMIC_DOUBLE; \
29  for (size_t i=0; i<ty.size(); i++) _args_.y(i) = ty[i]; \
30  } \
31  void forward(TMBad::ForwardArgs<TMBad::Replay> _args_) { \
32  CppAD::vector<TMBad::Replay> tx(this->input_size()); \
33  for (size_t i=0; i<tx.size(); i++) tx[i] = _args_.x(i); \
34  CppAD::vector<TMBad::Replay> ty = ATOMIC_NAME(tx); \
35  for (size_t i=0; i<ty.size(); i++) _args_.y(i) = ty[i]; \
36  } \
37  template<class Type> void reverse(TMBad::ReverseArgs<Type> _args_) { \
38  if (isDouble<Type>::value && \
39  this->output_size() == 1 && \
40  _args_.dy(0) == Type(0)) { \
41  return; \
42  } \
43  CppAD::vector<Type> tx(this->input_size()); \
44  CppAD::vector<Type> ty(this->output_size()); \
45  CppAD::vector<Type> px(this->input_size()); \
46  CppAD::vector<Type> py(this->output_size()); \
47  for (size_t i=0; i<tx.size(); i++) tx[i] = _args_.x(i); \
48  for (size_t i=0; i<ty.size(); i++) ty[i] = _args_.y(i); \
49  for (size_t i=0; i<py.size(); i++) py[i] = _args_.dy(i); \
50  ATOMIC_REVERSE; \
51  for (size_t i=0; i<px.size(); i++) _args_.dx(i) += px[i]; \
52  } \
53  void forward \
54  (TMBad::ForwardArgs<TMBad::Writer> &args) { TMBAD_ASSERT(false); } \
55  void reverse \
56  (TMBad::ReverseArgs<TMBad::Writer> &args) { TMBAD_ASSERT(false); } \
57 }; \
58 template<class dummy> \
59 CppAD::vector<TMBad::ad_aug> \
60 ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &tx) CSKIP_ATOMIC({ \
61  TMBad::Index n = tx.size(); \
62  TMBad::Index m = OUTPUT_DIM; \
63  typedef ATOMIC_NAME ## Op <> OP; \
64  bool all_constant = true; \
65  for (size_t i = 0; i<tx.size(); i++) \
66  all_constant &= tx[i].constant(); \
67  CppAD::vector<TMBad::ad_aug> ty(m); \
68  if (all_constant) { \
69  CppAD::vector<double> xd(tx.size()); \
70  for (size_t i=0; i<xd.size(); i++) xd[i] = tx[i].Value(); \
71  CppAD::vector<double> yd = ATOMIC_NAME(xd); \
72  for (size_t i=0; i<yd.size(); i++) ty[i] = yd[i]; \
73  } else { \
74  TMBad::OperatorPure* \
75  pOp = TMBad::get_glob()->getOperator<OP>(n, m); \
76  std::vector<TMBad::ad_plain> \
77  x(&tx[0], &tx[0] + tx.size()); \
78  std::vector<TMBad::ad_plain> \
79  y = TMBad::get_glob()->add_to_stack<OP>(pOp, x); \
80  for (size_t i=0; i<y.size(); i++) ty[i] = y[i]; \
81  } \
82  return ty; \
83 }) \
84 template<class dummy=void> \
85 void ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &tx, \
86  CppAD::vector<TMBad::ad_aug> &ty) { \
87  ty = ATOMIC_NAME(tx); \
88 } \
89 IF_TMB_PRECOMPILE_ATOMICS( \
90 template \
91 CppAD::vector<double> \
92 ATOMIC_NAME<> (const CppAD::vector<double>& tx); \
93 template \
94 CppAD::vector<TMBad::ad_aug> \
95 ATOMIC_NAME<>(const CppAD::vector<TMBad::ad_aug>& tx); \
96 )
97 
98 #define TMB_ATOMIC_STATIC_FUNCTION( \
99  ATOMIC_NAME, \
100  INPUT_SIZE, \
101  ATOMIC_DOUBLE, \
102  ATOMIC_REVERSE \
103 ) \
104 template<class dummy=void> \
105 CppAD::vector<TMBad::ad_aug> ATOMIC_NAME \
106 (const CppAD::vector<TMBad::ad_aug> &x); \
107 template<class dummy=void> \
108 CppAD::vector<double> ATOMIC_NAME \
109 (const CppAD::vector<double> &tx) CSKIP_ATOMIC({ \
110  CppAD::vector<double> ty(1); \
111  ATOMIC_DOUBLE; \
112  return ty; \
113 }) \
114 template<class dummy=void> \
115 double ATOMIC_NAME (const double *tx) { \
116  double ty[1]; \
117  ATOMIC_DOUBLE; \
118  return ty[0]; \
119 } \
120 template<class dummy=void> \
121 TMBad::ad_aug ATOMIC_NAME (const TMBad::ad_aug *tx) { \
122  CppAD::vector<TMBad::ad_aug> tx_(INPUT_SIZE); \
123  for (size_t i=0; i<INPUT_SIZE; i++) tx_[i]=tx[i]; \
124  return ATOMIC_NAME(tx_)[0]; \
125 } \
126 template<class dummy=void> \
127 struct ATOMIC_NAME ## Op : TMBad::global::Operator<INPUT_SIZE, 1> { \
128  ATOMIC_NAME ## Op () {} \
129  const char* op_name() { return #ATOMIC_NAME; } \
130  void forward(TMBad::ForwardArgs<TMBad::Scalar> _args_) { \
131  TMBad::Scalar tx[INPUT_SIZE]; \
132  TMBad::Scalar ty[1] ; \
133  for (size_t i=0; i<INPUT_SIZE; i++) tx[i] = _args_.x(i); \
134  ATOMIC_DOUBLE; \
135  for (size_t i=0; i<1; i++) _args_.y(i) = ty[i]; \
136  } \
137  static const bool add_forward_replay_copy = true; \
138  template<class Type> void no_W_set_but_not_used(Type *p) { } \
139  template<class Type> void reverse(TMBad::ReverseArgs<Type> _args_) { \
140  Type tx[INPUT_SIZE]; \
141  Type ty[1] ; \
142  Type px[INPUT_SIZE]; \
143  Type py[1] ; \
144  no_W_set_but_not_used(tx); \
145  no_W_set_but_not_used(ty); \
146  no_W_set_but_not_used(py); \
147  for (size_t i=0; i<INPUT_SIZE; i++) tx[i] = _args_.x(i); \
148  for (size_t i=0; i<1 ; i++) ty[i] = _args_.y(i); \
149  for (size_t i=0; i<1 ; i++) py[i] = _args_.dy(i); \
150  ATOMIC_REVERSE; \
151  for (size_t i=0; i<INPUT_SIZE; i++) _args_.dx(i) += px[i]; \
152  } \
153  template<class Type> \
154  void forward \
155  (TMBad::ForwardArgs<Type> &args) { TMBAD_ASSERT(false); } \
156  void reverse \
157  (TMBad::ReverseArgs<TMBad::Writer> &args) { TMBAD_ASSERT(false); } \
158 }; \
159 template<class dummy> \
160 CppAD::vector<TMBad::ad_aug> ATOMIC_NAME \
161 (const CppAD::vector<TMBad::ad_aug> &tx) CSKIP_ATOMIC({ \
162  TMBad::Index m = 1; \
163  typedef ATOMIC_NAME ## Op <> OP; \
164  bool all_constant = true; \
165  for (size_t i = 0; i<tx.size(); i++) \
166  all_constant &= tx[i].constant(); \
167  CppAD::vector<TMBad::ad_aug> ty(m); \
168  if (all_constant) { \
169  CppAD::vector<double> xd(tx.size()); \
170  for (size_t i=0; i<xd.size(); i++) xd[i] = tx[i].Value(); \
171  CppAD::vector<double> yd = ATOMIC_NAME(xd); \
172  for (size_t i=0; i<yd.size(); i++) ty[i] = yd[i]; \
173  } else { \
174  TMBad::OperatorPure* \
175  pOp = TMBad::get_glob()->getOperator<OP>(); \
176  std::vector<TMBad::ad_plain> \
177  x(&tx[0], &tx[0] + tx.size()); \
178  std::vector<TMBad::ad_plain> \
179  y = TMBad::get_glob()->add_to_stack<OP>(pOp, x); \
180  for (size_t i=0; i<y.size(); i++) ty[i] = y[i]; \
181  } \
182  return ty; \
183 }) \
184 template<class dummy=void> \
185 void ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &tx, \
186  CppAD::vector<TMBad::ad_aug> &ty) { \
187  ty = ATOMIC_NAME(tx); \
188 } \
189 IF_TMB_PRECOMPILE_ATOMICS( \
190 template \
191 CppAD::vector<double> \
192 ATOMIC_NAME<> (const CppAD::vector<double>& tx); \
193 template \
194 CppAD::vector<TMBad::ad_aug> \
195 ATOMIC_NAME<>(const CppAD::vector<TMBad::ad_aug>& tx); \
196 )
197 // Helper to forward declare atomic
198 #define TMB_ATOMIC_VECTOR_FUNCTION_DECLARE(ATOMIC_NAME) \
199 template<class dummy=void> \
200 CppAD::vector<TMBad::ad_aug> \
201 ATOMIC_NAME (const CppAD::vector<TMBad::ad_aug> &x); \
202 template<class dummy=void> \
203 CppAD::vector<double> \
204 ATOMIC_NAME (const CppAD::vector<double> &tx);
205 
217 #define TMB_ATOMIC_VECTOR_FUNCTION( \
218  ATOMIC_NAME, OUTPUT_DIM, \
219  ATOMIC_DOUBLE, \
220  ATOMIC_REVERSE \
221 ) \
222 TMB_ATOMIC_VECTOR_FUNCTION_DECLARE(ATOMIC_NAME) \
223 TMB_ATOMIC_VECTOR_FUNCTION_DEFINE( \
224  ATOMIC_NAME, OUTPUT_DIM, \
225  ATOMIC_DOUBLE, \
226  ATOMIC_REVERSE \
227 )
License: GPL v2