-
Notifications
You must be signed in to change notification settings - Fork 1
/
output.hh
263 lines (253 loc) · 10.4 KB
/
output.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
//
// B*B simulation of beam-beam effects in van der Meer scans at LHC.
// Copyright (C) 2019 Vladislav Balagura (balagura@cern.ch)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
// ----------------------------------------------------------------------
//
// Output data classes
//
#ifndef output_hh
#define output_hh 1
#include <vector>
#include <string>
#include <complex>
#include <atomic>
#include <memory> // for unique_ptr
#include "gzstream.hh"
#include "bb.hh"
using namespace std;
// -------------------- Output Data classes --------------------
//
// Integrals_Per_Turn accumulate overlap integrals per turn
//
struct Integrals_Per_Turn {
void resize(int n_turns);
bool empty();
void add(int i_turn, double add);
const vector<double>& integ();
void write(ostream& os, const string& prefix);
static string name() { return "integrals_per_turn"; }
protected:
vector<double> double_integ;
bool double_converted;
// Unfortunately, at the moment g++ 8.3 does not support atomic<double> +=
// operations. Therefore, the double accumulators are expressed via
// atomic<long long int> (by converting double's to long long int's and back). This
// is much faster than using mutex locks in the threads.
//
// In addition, one can not use vector of atomic<long long int> because such
// atomics do not have a copy constructor. So, dynamic array created by new()
// is used instead:
unique_ptr<atomic<long long int>[] > atomic_integ;
// To not loose precision in conversions double<->long long int, the added
// doubles are multiplied by a large double constant before converting to
// long long int. Then, when all threads finish, they are divided by the same
// constant. The constant should be large enough to keep precision but
// small enough so that the accumulator does not exceed the long long int
// range. The accumulator is the sum of the terms kicker_density(x,y) *
// point_weight[i]. Since the weights point_weight[i] are normalized, they
// are all less than 1. So, sum(kicker_density(x,y) * point_weight[i]) <
// sum(kicker_density(x,y)) = 1. Therefore, the maximal long long int value,
// LLONG_MAX, is chosen as a constant.
};
//
// Avr_XY_Per_Turn - accumulate x+iy complex coordinate of the kicked bunch center-of-mass
struct Avr_XY_Per_Turn {
void resize(int n_turns);
bool empty();
//
void init(double max_xy_radius, int n_points); // sets atomic_avr_xy_converter
void add(int i_turn, complex<double> add);
void write(ostream& os, const string& prefix);
static string name() { return "avr_xy_per_turn"; }
Avr_XY_Per_Turn() : size(0), atomic_avr_xy_converter(0) {}
protected:
unique_ptr<atomic<long long int>[]> atomic_avr_x, atomic_avr_y;
int size;
double atomic_avr_xy_converter;
};
//
// Integrals_per_particle and Avr_XY_Per_Particle - overlap integrals and average
// x+iy coordinates accumulated by one macroparticle during groups of many
// turns (NO_BB, ADIABATIC, STABILIZATION, BB). Ie. the kicked bunch density
// is represented in this case by a delta-function concentrated at only one
// particle which makes, however, many betatron oscillations (many turns).
//
template <class T>
// T = double for Integrals_Per_Particle, complex<double> for Avr_XY_Per_Particle
struct Per_Particle_Base : protected array<vector<T>, PHASES> { // [phase][particle]
void resize(int n_points);
bool empty();
void add(int phase, int particle, T add);
void normalize_by_n_turns(int n_turns, int phase, int particle); // parallelized, so per particle
array<T, PHASES> weighted_average(const vector<double>& particle_weights);
void write(ostream& os, const string& prefix);
static string name();
};
struct Integrals_Per_Particle : public Per_Particle_Base<double> {
static string name() { return "integrals_per_particle"; }
};
struct Avr_XY_Per_Particle : public Per_Particle_Base<complex<double> > {
static string name() { return "avr_xy_per_particle"; }
};
// Points store raw 4D coordinates for selected turns in
// Points[turn][particle][0/1 for x/y]
//
// This vector will be filled by all threads in parallel and when all finish,
// will be written to file. Unfortunately, direct writing to the same file by
// many threads would block them, this dictates storing large v_points in
// memory.
struct Points : protected vector<vector<array<complex<double>, 2> > > { // [turn][particle][0/1 for x/y]
void resize(int n_points,
int n_turns,
int select_one_turn_out_of);
using vector<vector<array<complex<double>, 2> > >::empty;
void add(int turn, int particle, complex<double> xZ, complex<double> yZ);
void write(ostream& os, const string& prefix);
static string name() { return "points"; }
protected:
int n_selected_turns;
};
//
// Create a vector of the classes above (denoted generically as class Data)
// and a boolean flag write_it controlling whether the corresponding data
// should be written out.
//
template<class Data>
struct Output {
Output() : write_it(false) {}
vector<Data> data; // [ip]
bool write_it;
ogzstream file;
static string name() { return Data::name(); }
};
//
// Main class: std::tuple<...> of everything above.
//
// It is made as a tuple to implement some rudimentary looping over types for
// initialization and writing to files. Unfortunately, the support of
// metaprogramming in C++ is very limited. Here, the looping is performed by
// the cryptic line
//
// apply([&f](auto& ... x) { (..., f(x)); }, *((Outputs_tuple*)this));
//
// using std::apply(function, tuple) and a fold expression both appearing in
// C++17. The "function" is a lambda [&f](auto& ... x) which makes "folding"
// (... operation pack) where "operation" is just the comma-operator "," and
// the pack contains f() applied in this way to all members of the tuple.
// The function f() should be applicable to all elements of the tuple.
//
// In principle, it should be easy for the compiler to call f() sequentially
// for all tuple's elements, as it knows all the types, positions in memory
// etc., but expressing it in C++ is cumbersome.
//
// In spite of that, here, it is made generic with apply+fold in order to be
// flexible in activating / deactivating the corresponding algorithms and the
// following outputs using configuration switches, and, possibly, for adding
// new output types (or removing existing).
//
// In addition to the generic initialization + output, for the specific
// algorithms one needs to access the structures in the tuple individually
// (knowing its type). They can be done using a helper member function like
//
// get<Integrals_Per_Particle>(ip)
//
// It returns the data of Integrals_Per_Particle type for the interaction point
// "ip" (counting from zero). Integrals_Per_Particle should be a unique class
// name in the tuple (otherwise, if the tuple contains two structures of the
// same type, the compiler will produce an error, see the documentation on
// std::get(std::tuple) which is used internally).
//
// To check whether the particular class and the corresponding algorithm has
// been chosen in the configuration, one can use
//
// is_active<Integrals_Per_Particles>(ip)
//
// "ip" is needed here since the algorithms are configurable per interaction
// point.
//
typedef tuple<Output<Integrals_Per_Turn>,
Output<Avr_XY_Per_Turn>,
Output<Integrals_Per_Particle>,
Output<Avr_XY_Per_Particle>,
Output<Points> > Outputs_tuple;
struct Outputs : public Outputs_tuple {
Outputs(int n_ip,
int n_points,
double max_xy_radius,
const string& output_options, // white-space separated list of options
const string& output_dir); // for double <-> llong int converter in Avr_XY_Per_Turn
void reset(int n_ip,
int n_points,
int n_turns,
int select_one_turn_out_of);
void write(int step);
// access to tuple individual components
template<class T> T& get (int ip) { return std::get<Output<T> >(*this).data[ip]; }
// should we process the data of type "T"?
template<class T> bool is_active(int ip) { return !std::get<Output<T> >(*this).data[ip].empty(); }
};
// ------------------------------------------------------------
//
// Per_Particle_Base<T> = array<vector<T>, PHASES> [phase][particle]
//
template <class T>
void Per_Particle_Base<T>::resize(int n_points) {
for (int phase=0; phase<PHASES; ++phase) (*this)[phase].assign(n_points, 0.); // 0+i0 for complex
}
template <class T>
bool Per_Particle_Base<T>::empty() { return (*this)[0].empty(); }
template <class T>
// initialized by zeros as any vector of doubles
void Per_Particle_Base<T>::add(int phase, int particle, T add) {
(*this)[phase][particle] += add;
}
template <class T>
void Per_Particle_Base<T>::normalize_by_n_turns(int n_turns, int phase, int particle) {
// parallelized, so per particle
(*this)[phase][particle] /= n_turns;
}
template <class T>
array<T, PHASES> Per_Particle_Base<T>::weighted_average(const vector<double>& particle_weights) {
array<T, PHASES> avr;
fill(avr.begin(), avr.end(), 0.);
for (int phase=0; phase<PHASES; ++phase) {
const vector<T>& v = (*this)[phase];
T& a = avr[phase];
for (size_t i=0; i<v.size(); ++i) a += particle_weights[i] * v[i];
}
return avr;
}
// operator<<(ostream&, complex<double>) is already overloaded in C++ standard
// and prints (real,imag) instead of "real imag" needed here. Use "dump"
// below instead:
inline void dump(ostream& os, double x) { os << x; }
inline void dump(ostream& os, complex<double> x) { os << real(x) << " " << imag(x); }
inline bool isnan(complex<double> x) { return isnan(real(x)) || isnan(imag(x)); }
//
template <class T>
void Per_Particle_Base<T>::write(ostream& os, const string& prefix) {
for (int phase=0; phase<PHASES; ++phase) {
for (size_t i = 0; i < (*this)[phase].size(); ++i) {
T x = (*this)[phase][i];
if (!isnan(x)) { // if in some phase n_turn=0, normalization to such n_turns gives nan
os << prefix << " " << phase << " " << i << " ";
dump(os, x);
os << '\n';
}
}
}
}
#endif