Skip to content

Commit 812c5fa

Browse files
authored
Merge pull request #36 from microhh/rt_fix
Fix the ray tracer for domains where TOD == TOA
2 parents 72dca00 + 02c1caf commit 812c5fa

File tree

6 files changed

+123
-156
lines changed

6 files changed

+123
-156
lines changed

include_rt/Fluxes_rt.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class Fluxes_rt
5858
class Fluxes_broadband_rt : public Fluxes_rt
5959
{
6060
public:
61-
Fluxes_broadband_rt(const int ncol_x, const int ncol_y, const int nlev);
61+
Fluxes_broadband_rt(const int ncol_x, const int ncol_y, const int n_z, const int nlev);
6262
virtual ~Fluxes_broadband_rt() {};
6363

6464
virtual void net_flux();
@@ -113,7 +113,7 @@ class Fluxes_broadband_rt : public Fluxes_rt
113113
class Fluxes_byband_rt : public Fluxes_broadband_rt
114114
{
115115
public:
116-
Fluxes_byband_rt(const int ncol_x, const int ncol_y, const int nlev, const int nbnd);
116+
Fluxes_byband_rt(const int ncol_x, const int ncol_y, const int n_z, const int nlev, const int nbnd);
117117
virtual ~Fluxes_byband_rt() {};
118118

119119
virtual void reduce(

src_cuda_rt/Fluxes_rt.cu

Lines changed: 6 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -116,61 +116,8 @@ namespace
116116
}
117117
}
118118

119-
//namespace rrtmgp_kernel_launcher
120-
//{
121-
//
122-
// void sum_broadband(
123-
// int ncol, int nlev, int ngpt,
124-
// const Array<Float,3>& spectral_flux, Array<Float,2>& broadband_flux)
125-
// {
126-
// rrtmgp_kernels::sum_broadband(
127-
// &ncol, &nlev, &ngpt,
128-
// const_cast<Float*>(spectral_flux.ptr()),
129-
// broadband_flux.ptr());
130-
// }
131-
//
132-
//
133-
// void net_broadband(
134-
// int ncol, int nlev,
135-
// const Array<Float,2>& broadband_flux_dn, const Array<Float,2>& broadband_flux_up,
136-
// Array<Float,2>& broadband_flux_net)
137-
// {
138-
// rrtmgp_kernels::net_broadband_precalc(
139-
// &ncol, &nlev,
140-
// const_cast<Float*>(broadband_flux_dn.ptr()),
141-
// const_cast<Float*>(broadband_flux_up.ptr()),
142-
// broadband_flux_net.ptr());
143-
// }
144-
//
145-
//
146-
// void sum_byband(
147-
// int ncol, int nlev, int ngpt, int nbnd,
148-
// const Array<int,2>& band_lims,
149-
// const Array<Float,3>& spectral_flux,
150-
// Array<Float,3>& byband_flux)
151-
// {
152-
// rrtmgp_kernels::sum_byband(
153-
// &ncol, &nlev, &ngpt, &nbnd,
154-
// const_cast<int*>(band_lims.ptr()),
155-
// const_cast<Float*>(spectral_flux.ptr()),
156-
// byband_flux.ptr());
157-
// }
158-
//
159-
//
160-
// void net_byband(
161-
// int ncol, int nlev, int nband,
162-
// const Array<Float,3>& byband_flux_dn, const Array<Float,3>& byband_flux_up,
163-
// Array<Float,3>& byband_flux_net)
164-
// {
165-
// rrtmgp_kernels::net_byband_precalc(
166-
// &ncol, &nlev, &nband,
167-
// const_cast<Float*>(byband_flux_dn.ptr()),
168-
// const_cast<Float*>(byband_flux_up.ptr()),
169-
// byband_flux_net.ptr());
170-
// }
171-
172-
173-
Fluxes_broadband_rt::Fluxes_broadband_rt(const int ncol_x, const int ncol_y, const int nlev) :
119+
120+
Fluxes_broadband_rt::Fluxes_broadband_rt(const int ncol_x, const int ncol_y, const int n_z, const int nlev) :
174121
flux_up ({ncol_x*ncol_y, nlev}),
175122
flux_dn ({ncol_x*ncol_y, nlev}),
176123
flux_dn_dir ({ncol_x*ncol_y, nlev}),
@@ -180,8 +127,8 @@ Fluxes_broadband_rt::Fluxes_broadband_rt(const int ncol_x, const int ncol_y, con
180127
flux_sfc_dir({ncol_x, ncol_y}),
181128
flux_sfc_dif({ncol_x, ncol_y}),
182129
flux_sfc_up ({ncol_x, ncol_y}),
183-
flux_abs_dir({ncol_x, ncol_y, nlev-1}),
184-
flux_abs_dif({ncol_x, ncol_y, nlev-1})
130+
flux_abs_dir({ncol_x, ncol_y, n_z}),
131+
flux_abs_dif({ncol_x, ncol_y, n_z})
185132
{}
186133

187134

@@ -259,8 +206,8 @@ void Fluxes_broadband_rt::reduce(
259206
}
260207

261208

262-
Fluxes_byband_rt::Fluxes_byband_rt(const int ncol_x, const int ncol_y, const int nlev, const int nbnd) :
263-
Fluxes_broadband_rt(ncol_x, ncol_y, nlev),
209+
Fluxes_byband_rt::Fluxes_byband_rt(const int ncol_x, const int ncol_y, const int n_z, const int nlev, const int nbnd) :
210+
Fluxes_broadband_rt(ncol_x, ncol_y, n_z, nlev),
264211
bnd_flux_up ({ncol_x * ncol_y, nlev, nbnd}),
265212
bnd_flux_dn ({ncol_x * ncol_y, nlev, nbnd}),
266213
bnd_flux_dn_dir({ncol_x * ncol_y, nlev, nbnd}),

src_cuda_rt/Raytracer.cu

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ namespace
8686
const int icol_x = blockIdx.x*blockDim.x + threadIdx.x;
8787
const int icol_y = blockIdx.y*blockDim.y + threadIdx.y;
8888
const int iz = blockIdx.z*blockDim.z + threadIdx.z;
89-
90-
if ( (icol_x < grid_cells.x) && (icol_y < grid_cells.y) && (iz < (grid_cells.z - 1)) )
89+
90+
if ( (icol_x < grid_cells.x) && (icol_y < grid_cells.y) && (iz < (grid_cells.z-1)) )
9191
{
9292
const int idx = icol_x + icol_y*grid_cells.x + iz*grid_cells.y*grid_cells.x;
9393
const Float kext_tot = tau_tot[idx] / grid_d.z;
@@ -96,6 +96,7 @@ namespace
9696
const Float ksca_cld = kext_cld * ssa_cld[idx];
9797
const Float ksca_aer = kext_aer * ssa_aer[idx];
9898
const Float ksca_gas = kext_tot * ssa_tot[idx] - ksca_cld - ksca_aer;
99+
99100
k_ext[idx] = tau_tot[idx] / grid_d.z;
100101

101102
scat_asy[idx].k_sca_gas = ksca_gas;
@@ -106,6 +107,7 @@ namespace
106107
}
107108
}
108109

110+
109111
__global__
110112
void bundles_optical_props_tod(
111113
const Vector<int> grid_cells, const Vector<Float> grid_d, const int n_lev,
@@ -119,7 +121,7 @@ namespace
119121

120122
const int z_tod = grid_cells.z - 1;
121123

122-
if ( (icol_x < grid_cells.x) && (icol_y < grid_cells.y))
124+
if ( (icol_x < grid_cells.x) && (icol_y < grid_cells.y) )
123125
{
124126
Float tau_tot_sum = Float(0.);
125127
Float tausca_tot_sum = Float(0.);
@@ -129,27 +131,29 @@ namespace
129131

130132
Float tausca_aer_sum = Float(0.);
131133
Float tauscag_aer_sum = Float(0.);
132-
134+
133135
for (int iz=z_tod; iz<n_lev; ++iz)
134136
{
135137
const int idx = icol_x + icol_y*grid_cells.x + iz*grid_cells.y*grid_cells.x;
138+
136139
tau_tot_sum += tau_tot[idx];
137140
tausca_tot_sum += tau_tot[idx] * ssa_tot[idx];
138-
141+
139142
tausca_cld_sum += tau_cld[idx] * ssa_cld[idx];
140143
tauscag_cld_sum += tau_cld[idx] * ssa_cld[idx] * asy_cld[idx];
141-
144+
142145
tausca_aer_sum += tau_aer[idx] * ssa_aer[idx];
143146
tauscag_aer_sum += tau_aer[idx] * ssa_aer[idx] * asy_aer[idx];
144147
}
145148

146149
const int idx = icol_x + icol_y*grid_cells.x + z_tod*grid_cells.y*grid_cells.x;
147-
150+
148151
const Float kext_tot = tau_tot_sum / grid_d.z;
149-
152+
150153
const Float ksca_cld = tausca_cld_sum / grid_d.z;
151154
const Float ksca_aer = tausca_aer_sum / grid_d.z;
152155
const Float ksca_gas = tausca_tot_sum / grid_d.z - ksca_cld - ksca_aer;
156+
153157
k_ext[idx] = kext_tot;
154158

155159
scat_asy[idx].k_sca_gas = ksca_gas;
@@ -171,7 +175,7 @@ namespace
171175
const int icol_x = blockIdx.x*blockDim.x + threadIdx.x;
172176
const int icol_y = blockIdx.y*blockDim.y + threadIdx.y;
173177

174-
if ( ( icol_x < grid_cells.x) && ( icol_y < grid_cells.y) )
178+
if ( (icol_x < grid_cells.x) && (icol_y < grid_cells.y) )
175179
{
176180
const int idx = icol_x + icol_y*grid_cells.x;
177181
const Float flux_per_ray = toa_src / photons_per_col;
@@ -183,6 +187,7 @@ namespace
183187
}
184188
}
185189

190+
186191
__global__
187192
void count_to_flux_3d(
188193
const Vector<int> grid_cells, const Float photons_per_col,
@@ -197,7 +202,9 @@ namespace
197202
if ( ( icol_x < grid_cells.x) && ( icol_y < grid_cells.y) && ( iz < grid_cells.z))
198203
{
199204
const int idx = icol_x + icol_y*grid_cells.x + iz*grid_cells.x*grid_cells.y;
205+
200206
const Float flux_per_ray = toa_src / photons_per_col;
207+
201208
flux_1[idx] = count_1[idx] * flux_per_ray / grid_d.z;
202209
flux_2[idx] = count_2[idx] * flux_per_ray / grid_d.z;
203210
}
@@ -280,8 +287,8 @@ void Raytracer::trace_rays(
280287
tau_cloud.ptr(), ssa_cloud.ptr(), asy_cloud.ptr(),
281288
tau_aeros.ptr(), ssa_aeros.ptr(), asy_aeros.ptr(),
282289
k_ext.ptr(), scat_asy.ptr());
283-
284-
// second, integrate from TOD to TOA
290+
291+
// second, integrate from TOD to TOA
285292
bundles_optical_props_tod<<<grid_2d, block_2d>>>(
286293
grid_cells, grid_d, n_lay,
287294
tau_total.ptr(), ssa_total.ptr(),
@@ -337,37 +344,37 @@ void Raytracer::trace_rays(
337344
// smallest two power that is larger than grid dimension (minimum of 2 is currently required)
338345
const Int qrng_grid_x = std::max(Float(2), pow(Float(2.), ceil(std::log2(Float(grid_cells.x)))) );
339346
const Int qrng_grid_y = std::max(Float(2), pow(Float(2.), ceil(std::log2(Float(grid_cells.y)))) );
340-
347+
341348
// total number of photons
342349
const Int photons_total = photons_per_pixel * qrng_grid_x * qrng_grid_y;
343350

344351
// number of photons per thread, this should a power of 2 and nonzero
345352
Float photons_per_thread_tmp = std::max(Float(1), static_cast<Float>(photons_total) / (rt_kernel_grid * rt_kernel_block));
346353
Int photons_per_thread = pow(Float(2.), std::floor(std::log2(photons_per_thread_tmp)));
347-
354+
348355
// with very low number of columns and photons_per_pixel, we may have too many threads firing a single photons, actually exceeding photons_per pixel
349356
// In that case, reduce grid and block size
350357
Int actual_photons_per_pixel = photons_per_thread * rt_kernel_grid * rt_kernel_block / (qrng_grid_x * qrng_grid_y);
351-
358+
352359
int rt_kernel_grid_size = rt_kernel_grid;
353360
int rt_kernel_block_size = rt_kernel_block;
354361
while ( (actual_photons_per_pixel > photons_per_pixel) )
355362
{
356363
if (rt_kernel_grid_size > 1)
357364
rt_kernel_grid_size /= 2;
358365
else
359-
rt_kernel_block_size /= 2;
360-
366+
rt_kernel_block_size /= 2;
367+
361368
photons_per_thread_tmp = std::max(Float(1), static_cast<Float>(photons_total) / (rt_kernel_grid_size * rt_kernel_block_size));
362369
photons_per_thread = pow(Float(2.), std::floor(std::log2(photons_per_thread_tmp)));
363370
actual_photons_per_pixel = photons_per_thread * rt_kernel_grid_size * rt_kernel_block_size / (qrng_grid_x * qrng_grid_y);
364371
}
365-
372+
366373
dim3 grid(rt_kernel_grid_size);
367374
dim3 block(rt_kernel_block_size);
368-
375+
369376
const int mie_table_size = mie_cdf.size();
370-
377+
371378
const int qrng_gpt_offset = (igpt-1) * rt_kernel_grid_size * rt_kernel_block_size * photons_per_thread;
372379
ray_tracer_kernel<<<grid, block,sizeof(Float)*mie_table_size>>>(
373380
switch_independent_column,

0 commit comments

Comments
 (0)