@@ -86,8 +86,8 @@ namespace
86
86
const int icol_x = blockIdx .x *blockDim .x + threadIdx .x ;
87
87
const int icol_y = blockIdx .y *blockDim .y + threadIdx .y ;
88
88
const int iz = blockIdx .z *blockDim .z + threadIdx .z ;
89
-
90
- if ( (icol_x < grid_cells.x ) && (icol_y < grid_cells.y ) && (iz < (grid_cells.z - 1 )) )
89
+
90
+ if ( (icol_x < grid_cells.x ) && (icol_y < grid_cells.y ) && (iz < (grid_cells.z - 1 )) )
91
91
{
92
92
const int idx = icol_x + icol_y*grid_cells.x + iz*grid_cells.y *grid_cells.x ;
93
93
const Float kext_tot = tau_tot[idx] / grid_d.z ;
@@ -96,6 +96,7 @@ namespace
96
96
const Float ksca_cld = kext_cld * ssa_cld[idx];
97
97
const Float ksca_aer = kext_aer * ssa_aer[idx];
98
98
const Float ksca_gas = kext_tot * ssa_tot[idx] - ksca_cld - ksca_aer;
99
+
99
100
k_ext[idx] = tau_tot[idx] / grid_d.z ;
100
101
101
102
scat_asy[idx].k_sca_gas = ksca_gas;
@@ -106,6 +107,7 @@ namespace
106
107
}
107
108
}
108
109
110
+
109
111
__global__
110
112
void bundles_optical_props_tod (
111
113
const Vector<int > grid_cells, const Vector<Float> grid_d, const int n_lev,
@@ -119,7 +121,7 @@ namespace
119
121
120
122
const int z_tod = grid_cells.z - 1 ;
121
123
122
- if ( (icol_x < grid_cells.x ) && (icol_y < grid_cells.y ))
124
+ if ( (icol_x < grid_cells.x ) && (icol_y < grid_cells.y ) )
123
125
{
124
126
Float tau_tot_sum = Float (0 .);
125
127
Float tausca_tot_sum = Float (0 .);
@@ -129,27 +131,29 @@ namespace
129
131
130
132
Float tausca_aer_sum = Float (0 .);
131
133
Float tauscag_aer_sum = Float (0 .);
132
-
134
+
133
135
for (int iz=z_tod; iz<n_lev; ++iz)
134
136
{
135
137
const int idx = icol_x + icol_y*grid_cells.x + iz*grid_cells.y *grid_cells.x ;
138
+
136
139
tau_tot_sum += tau_tot[idx];
137
140
tausca_tot_sum += tau_tot[idx] * ssa_tot[idx];
138
-
141
+
139
142
tausca_cld_sum += tau_cld[idx] * ssa_cld[idx];
140
143
tauscag_cld_sum += tau_cld[idx] * ssa_cld[idx] * asy_cld[idx];
141
-
144
+
142
145
tausca_aer_sum += tau_aer[idx] * ssa_aer[idx];
143
146
tauscag_aer_sum += tau_aer[idx] * ssa_aer[idx] * asy_aer[idx];
144
147
}
145
148
146
149
const int idx = icol_x + icol_y*grid_cells.x + z_tod*grid_cells.y *grid_cells.x ;
147
-
150
+
148
151
const Float kext_tot = tau_tot_sum / grid_d.z ;
149
-
152
+
150
153
const Float ksca_cld = tausca_cld_sum / grid_d.z ;
151
154
const Float ksca_aer = tausca_aer_sum / grid_d.z ;
152
155
const Float ksca_gas = tausca_tot_sum / grid_d.z - ksca_cld - ksca_aer;
156
+
153
157
k_ext[idx] = kext_tot;
154
158
155
159
scat_asy[idx].k_sca_gas = ksca_gas;
@@ -171,7 +175,7 @@ namespace
171
175
const int icol_x = blockIdx .x *blockDim .x + threadIdx .x ;
172
176
const int icol_y = blockIdx .y *blockDim .y + threadIdx .y ;
173
177
174
- if ( ( icol_x < grid_cells.x ) && ( icol_y < grid_cells.y ) )
178
+ if ( (icol_x < grid_cells.x ) && (icol_y < grid_cells.y ) )
175
179
{
176
180
const int idx = icol_x + icol_y*grid_cells.x ;
177
181
const Float flux_per_ray = toa_src / photons_per_col;
@@ -183,6 +187,7 @@ namespace
183
187
}
184
188
}
185
189
190
+
186
191
__global__
187
192
void count_to_flux_3d (
188
193
const Vector<int > grid_cells, const Float photons_per_col,
@@ -197,7 +202,9 @@ namespace
197
202
if ( ( icol_x < grid_cells.x ) && ( icol_y < grid_cells.y ) && ( iz < grid_cells.z ))
198
203
{
199
204
const int idx = icol_x + icol_y*grid_cells.x + iz*grid_cells.x *grid_cells.y ;
205
+
200
206
const Float flux_per_ray = toa_src / photons_per_col;
207
+
201
208
flux_1[idx] = count_1[idx] * flux_per_ray / grid_d.z ;
202
209
flux_2[idx] = count_2[idx] * flux_per_ray / grid_d.z ;
203
210
}
@@ -280,8 +287,8 @@ void Raytracer::trace_rays(
280
287
tau_cloud.ptr (), ssa_cloud.ptr (), asy_cloud.ptr (),
281
288
tau_aeros.ptr (), ssa_aeros.ptr (), asy_aeros.ptr (),
282
289
k_ext.ptr (), scat_asy.ptr ());
283
-
284
- // second, integrate from TOD to TOA
290
+
291
+ // second, integrate from TOD to TOA
285
292
bundles_optical_props_tod<<<grid_2d, block_2d>>> (
286
293
grid_cells, grid_d, n_lay,
287
294
tau_total.ptr (), ssa_total.ptr (),
@@ -337,37 +344,37 @@ void Raytracer::trace_rays(
337
344
// smallest two power that is larger than grid dimension (minimum of 2 is currently required)
338
345
const Int qrng_grid_x = std::max (Float (2 ), pow (Float (2 .), ceil (std::log2 (Float (grid_cells.x )))) );
339
346
const Int qrng_grid_y = std::max (Float (2 ), pow (Float (2 .), ceil (std::log2 (Float (grid_cells.y )))) );
340
-
347
+
341
348
// total number of photons
342
349
const Int photons_total = photons_per_pixel * qrng_grid_x * qrng_grid_y;
343
350
344
351
// number of photons per thread, this should a power of 2 and nonzero
345
352
Float photons_per_thread_tmp = std::max (Float (1 ), static_cast <Float>(photons_total) / (rt_kernel_grid * rt_kernel_block));
346
353
Int photons_per_thread = pow (Float (2 .), std::floor (std::log2 (photons_per_thread_tmp)));
347
-
354
+
348
355
// with very low number of columns and photons_per_pixel, we may have too many threads firing a single photons, actually exceeding photons_per pixel
349
356
// In that case, reduce grid and block size
350
357
Int actual_photons_per_pixel = photons_per_thread * rt_kernel_grid * rt_kernel_block / (qrng_grid_x * qrng_grid_y);
351
-
358
+
352
359
int rt_kernel_grid_size = rt_kernel_grid;
353
360
int rt_kernel_block_size = rt_kernel_block;
354
361
while ( (actual_photons_per_pixel > photons_per_pixel) )
355
362
{
356
363
if (rt_kernel_grid_size > 1 )
357
364
rt_kernel_grid_size /= 2 ;
358
365
else
359
- rt_kernel_block_size /= 2 ;
360
-
366
+ rt_kernel_block_size /= 2 ;
367
+
361
368
photons_per_thread_tmp = std::max (Float (1 ), static_cast <Float>(photons_total) / (rt_kernel_grid_size * rt_kernel_block_size));
362
369
photons_per_thread = pow (Float (2 .), std::floor (std::log2 (photons_per_thread_tmp)));
363
370
actual_photons_per_pixel = photons_per_thread * rt_kernel_grid_size * rt_kernel_block_size / (qrng_grid_x * qrng_grid_y);
364
371
}
365
-
372
+
366
373
dim3 grid (rt_kernel_grid_size);
367
374
dim3 block (rt_kernel_block_size);
368
-
375
+
369
376
const int mie_table_size = mie_cdf.size ();
370
-
377
+
371
378
const int qrng_gpt_offset = (igpt-1 ) * rt_kernel_grid_size * rt_kernel_block_size * photons_per_thread;
372
379
ray_tracer_kernel<<<grid, block,sizeof (Float)*mie_table_size>>> (
373
380
switch_independent_column,
0 commit comments