Skip to content

Commit

Permalink
Merge pull request #47 from MennoVeerman/main
Browse files Browse the repository at this point in the history
new Mie LUTs with different sizes
  • Loading branch information
Chiil authored Jan 17, 2025
2 parents a0f96ac + 751edd6 commit 626fe30
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 81 deletions.
Binary file modified data/mie_lut_broadband.nc
Binary file not shown.
Binary file modified data/mie_lut_visualisation.nc
Binary file not shown.
17 changes: 9 additions & 8 deletions include_rt_kernels/raytracer_kernels_bw.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,21 @@ void ray_tracer_kernel_bw(
const Float* __restrict__ mie_ang,
const Float* __restrict__ mie_phase,
const Float* __restrict__ mie_phase_ang,
const int mie_table_size);

const int mie_cdf_table_size,
const int mie_phase_table_size);

__global__
void accumulate_clouds_kernel(
const Float* __restrict__ lwp,
const Float* __restrict__ iwp,
const Float* __restrict__ lwp,
const Float* __restrict__ iwp,
const Float* __restrict__ tau_cloud,
const Vector<Float> grid_d,
const Vector<Float> grid_size,
const Vector<int> grid_cells,
Float* __restrict__ liwp_cam,
Float* __restrict__ tauc_cam,
Float* __restrict__ dist_cam,
Float* __restrict__ zen_cam,
Float* __restrict__ liwp_cam,
Float* __restrict__ tauc_cam,
Float* __restrict__ dist_cam,
Float* __restrict__ zen_cam,
const Camera camera);

#endif
47 changes: 26 additions & 21 deletions src_cuda_rt/Raytracer_bw.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,17 @@ namespace

const int x0 = grid_x*fx;
const int x1 = min(grid_cells.x-1, int(floor((grid_x+1)*fx)));

const int y0 = grid_y*fy;
const int y1 = min(grid_cells.y-1, int(floor((grid_y+1)*fy)));

const int z0 = grid_z*fz;
const int z1 = min(grid_cells.z-1, int(floor((grid_z+1)*fz)));

const int ijk_grid = grid_x + grid_y*kn_grid.x + grid_z*kn_grid.y*kn_grid.x;
Float k_null_min = Float(1e15); // just a ridicilously high value
Float k_null_max = Float(0.);

for (int k=z0; k<=z1; ++k)
for (int j=y0; j<=y1; ++j)
for (int i=x0; i<=x1; ++i)
Expand Down Expand Up @@ -460,7 +460,7 @@ void Raytracer_bw::trace_rays(

Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, camera_count.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, shot_count.ptr());

counter.fill(Int(0));

// domain sizes
Expand All @@ -473,9 +473,10 @@ void Raytracer_bw::trace_rays(

dim3 grid{bw_kernel_grid}, block{bw_kernel_block};

const int mie_table_size = mie_cdf.size();
const int mie_cdf_table_size = mie_cdf.size();
const int mie_phase_table_size = mie_phase_ang.size();

ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float) + 2 * sizeof(Float)*mie_table_size>>>(
ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ sizeof(Float)*(mie_cdf_table_size+mie_phase_table_size)>>>(
igpt-1,
photons_per_pixel, k_null_grid.ptr(),
camera_count.ptr(),
Expand All @@ -491,7 +492,9 @@ void Raytracer_bw::trace_rays(
grid_size, grid_d, grid_cells, kn_grid,
sun_direction, camera, nbg,
mie_cdf.ptr(), mie_ang.ptr(),
mie_phase.ptr(), mie_phase_ang.ptr(), mie_table_size);
mie_phase.ptr(), mie_phase_ang.ptr(),
mie_cdf_table_size,
mie_phase_table_size);

//// convert counts to fluxes
const int block_cam_x = 8;
Expand Down Expand Up @@ -621,9 +624,10 @@ void Raytracer_bw::trace_rays_bb(

dim3 grid{bw_kernel_grid}, block{bw_kernel_block};

const int mie_table_size = mie_cdf.size();
const int mie_cdf_table_size = mie_cdf.size();
const int mie_phase_table_size = mie_phase_ang.size();

ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ 2 * sizeof(Float)*mie_table_size>>>(
ray_tracer_kernel_bw<<<grid, block, nbg*sizeof(Float)+ sizeof(Float)*(mie_cdf_table_size+mie_phase_table_size)>>>(
igpt-1,
photons_per_pixel, k_null_grid.ptr(),
camera_count.ptr(),
Expand All @@ -640,7 +644,8 @@ void Raytracer_bw::trace_rays_bb(
sun_direction, camera, nbg,
mie_cdf.ptr(), mie_ang.ptr(),
mie_phase.ptr(), mie_phase_ang.ptr(),
mie_table_size);
mie_cdf_table_size,
mie_phase_table_size);

//// convert counts to fluxes
const int block_cam_x = 8;
Expand Down Expand Up @@ -676,7 +681,7 @@ void Raytracer_bw::accumulate_clouds(
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, liwp_cam.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, tauc_cam.ptr());
Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(camera.nx, camera.ny, dist_cam.ptr());

const int n_pix = camera.nx * camera.ny;
const int n_block = std::min(n_pix, 512);
const int n_grid = std::ceil(Float(n_pix)/n_block);
Expand All @@ -686,7 +691,7 @@ void Raytracer_bw::accumulate_clouds(

// domain sizes
const Vector<Float> grid_size = grid_d * grid_cells;

accumulate_clouds_kernel<<<grid, block>>>(
lwp.ptr(),
iwp.ptr(),
Expand All @@ -703,12 +708,12 @@ void Raytracer_bw::accumulate_clouds(
}











64 changes: 34 additions & 30 deletions src_kernels_cuda_rt/raytracer_kernels_bw.cu
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ namespace
atomicAdd(&camera_count[ij_cam], weight * trans_sun);
}
atomicAdd(&camera_shot[ij_cam], Float(1.));

}

__device__
Expand Down Expand Up @@ -298,19 +298,23 @@ void ray_tracer_kernel_bw(
const Float* __restrict__ mie_ang,
const Float* __restrict__ mie_phase,
const Float* __restrict__ mie_phase_ang,
const int mie_table_size)
const int mie_cdf_table_size,
const int mie_phase_table_size)
{
extern __shared__ Float shared_arrays[];
Float* mie_cdf_shared = &shared_arrays[0];
Float* mie_phase_ang_shared = &shared_arrays[mie_table_size];
Float* bg_tau_cum = &shared_arrays[2*mie_table_size];
Float* mie_phase_ang_shared = &shared_arrays[mie_cdf_table_size];
Float* bg_tau_cum = &shared_arrays[mie_phase_table_size+mie_cdf_table_size];
if (threadIdx.x==0)
{
if (mie_table_size > 0)
if (mie_cdf_table_size > 0)
{
for (int mie_i=0; mie_i<mie_table_size; ++mie_i)
for (int mie_i=0; mie_i<mie_cdf_table_size; ++mie_i)
{
mie_cdf_shared[mie_i] = mie_cdf[mie_i];
}
for (int mie_i=0; mie_i<mie_phase_table_size; ++mie_i)
{
mie_phase_ang_shared[mie_i] = mie_phase_ang[mie_i];
}
}
Expand All @@ -324,11 +328,11 @@ void ray_tracer_kernel_bw(
}

__syncthreads();

Vector<Float> surface_normal = {0, 0, 1};

const int n = blockDim.x * blockIdx.x + threadIdx.x;

const Float bg_transmissivity = exp(-bg_tau_cum[0]);

const Vector<Float> kn_grid_d = grid_size / kn_grid;
Expand All @@ -338,7 +342,7 @@ void ray_tracer_kernel_bw(

const Float s_min = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon;
const Float s_min_bg = max(max(grid_size.x, grid_size.y), z_top) * Float_epsilon;

while (counter[0] < camera.npix*photons_per_pixel)
{
const Int count = atomicAdd(&counter[0], 1);
Expand Down Expand Up @@ -677,19 +681,19 @@ void ray_tracer_kernel_bw(
break;
}
const Float cos_scat = scatter_type == 0 ? rayleigh(rng()) : // gases -> rayleigh,
1 ? ( (mie_table_size > 0) //clouds: Mie or HG
? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_table_size) )
1 ? ( (mie_cdf_table_size > 0) //clouds: Mie or HG
? cos( mie_sample_angle(mie_cdf_shared, mie_ang, rng(), r_eff[ijk], mie_cdf_table_size) )
: henyey(g, rng()))
: henyey(g, rng()); //aerosols
const Float sin_scat = max(Float(0.), sqrt(Float(1.) - cos_scat*cos_scat + Float_epsilon));

// SUN SCATTERING GOES HERE
const Phase_kind kind = scatter_type == 0 ? Phase_kind::Rayleigh :
1 ? (mie_table_size > 0)
1 ? (mie_phase_table_size > 0)
? Phase_kind::Mie
: Phase_kind::HG
: Phase_kind::HG;
const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_table_size,
const Float p_sun = probability_from_sun(photon, sun_direction, sun_solid_angle, g, mie_phase_ang_shared, mie_phase, r_eff[ijk], mie_phase_table_size,
surface_normal, kind);
const Float trans_sun = transmission_direct_sun(photon,n,rng,sun_direction,
k_null_grid,k_ext,
Expand Down Expand Up @@ -736,23 +740,23 @@ void ray_tracer_kernel_bw(
}
__global__
void accumulate_clouds_kernel(
const Float* __restrict__ lwp,
const Float* __restrict__ iwp,
const Float* __restrict__ tau_cloud,
const Float* __restrict__ lwp,
const Float* __restrict__ iwp,
const Float* __restrict__ tau_cloud,
const Vector<Float> grid_d,
const Vector<Float> grid_size,
const Vector<int> grid_cells,
Float* __restrict__ liwp_cam,
Float* __restrict__ tauc_cam,
Float* __restrict__ dist_cam,
Float* __restrict__ zen_cam,
Float* __restrict__ liwp_cam,
Float* __restrict__ tauc_cam,
Float* __restrict__ dist_cam,
Float* __restrict__ zen_cam,
const Camera camera)
{
const int pix = blockDim.x * blockIdx.x + threadIdx.x;
const Float s_eps = max(max(grid_size.z, grid_size.x), grid_size.y) * Float_epsilon;
Vector<Float> direction;
Vector<Float> direction;
Vector<Float> position;

if (pix < camera.nx * camera.ny)
{
Float liwp_sum = 0;
Expand All @@ -778,13 +782,13 @@ void ray_tracer_kernel_bw(
{
direction = normalize(camera.cam_width * (Float(2.)*i-Float(1.0)) + camera.cam_height * (Float(2.)*j-Float(1.0)) + camera.cam_depth);
}

// first bring photon to top of dynamical domain
if ((position.z >= (grid_size.z - s_eps)) && (direction.z < Float(0.)))
{
const Float s = abs((position.z - grid_size.z)/direction.z);
position = position + direction * s - s_eps;

// Cyclic boundary condition in x.
position.x = fmod(position.x, grid_size.x);
if (position.x < Float(0.))
Expand All @@ -802,26 +806,26 @@ void ray_tracer_kernel_bw(
const int j = float_to_int(position.y, grid_d.y, grid_cells.y);
const int k = float_to_int(position.z, grid_d.z, grid_cells.z);
const int ijk = i + j*grid_cells.x + k*grid_cells.x*grid_cells.y;

const Float sx = abs((direction.x > 0) ? ((i+1) * grid_d.x - position.x)/direction.x : (i*grid_d.x - position.x)/direction.x);
const Float sy = abs((direction.y > 0) ? ((j+1) * grid_d.y - position.y)/direction.y : (j*grid_d.y - position.y)/direction.y);
const Float sz = abs((direction.z > 0) ? ((k+1) * grid_d.z - position.z)/direction.z : (k*grid_d.z - position.z)/direction.z);
const Float s_min = min(sx, min(sy, sz));

liwp_sum += s_min * (lwp[ijk] + iwp[ijk]);
tauc_sum += s_min * tau_cloud[ijk];
if (!reached_cloud)
{
dist += s_min;
reached_cloud = tau_cloud[ijk] > 0;
}

position = position + direction * s_min;

position.x += direction.x >= 0 ? s_eps : -s_eps;
position.y += direction.y >= 0 ? s_eps : -s_eps;
position.z += direction.z >= 0 ? s_eps : -s_eps;

// Cyclic boundary condition in x.
position.x = fmod(position.x, grid_size.x);
if (position.x < Float(0.))
Expand All @@ -833,7 +837,7 @@ void ray_tracer_kernel_bw(
position.y += grid_size.y;

}

// divide out initial layer thicknes, equivalent to first converting lwp (g/m2) to lwc (g/m3) or optical depth to k_ext(1/m)
liwp_cam[pix] = liwp_sum / grid_d.z;
tauc_cam[pix] = tauc_sum / grid_d.z;
Expand Down
24 changes: 14 additions & 10 deletions src_test/Radiation_solver_bw.cu
Original file line number Diff line number Diff line change
Expand Up @@ -747,9 +747,10 @@ void Radiation_solver_shortwave::load_mie_tables(
const int n_bnd_sw = this->get_n_bnd_gpu();
const int n_re = mie_nc.get_dimension_size("r_eff");
const int n_mie = mie_nc.get_dimension_size("n_ang");
const int n_mie_cdf = mie_nc.get_dimension_size("n_ang_cdf");

Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie}), {n_mie, 1, n_bnd_sw});
Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw});
Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_mie_cdf}), {n_mie_cdf, 1, n_bnd_sw});
Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_re, n_mie_cdf}), {n_mie_cdf, n_re, 1, n_bnd_sw});

Array<Float,4> mie_phase(mie_nc.get_variable<Float>("phase", {n_bnd_sw, n_re, n_mie}), {n_mie, n_re, 1, n_bnd_sw});
Array<Float,1> mie_phase_ang(mie_nc.get_variable<Float>("phase_angle", {n_mie}), {n_mie});
Expand All @@ -767,10 +768,11 @@ void Radiation_solver_shortwave::load_mie_tables(
const int n_bnd_sw = this->get_n_bnd_gpu();
const int n_re = mie_nc.get_dimension_size("r_eff");
const int n_mie = mie_nc.get_dimension_size("n_ang");
const int n_mie_cdf = mie_nc.get_dimension_size("n_ang_cdf");
const int n_sub = mie_nc.get_dimension_size("sub_band");

Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_sub, n_mie}), {n_mie, n_sub, n_bnd_sw});
Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_sub, n_re, n_mie}), {n_mie, n_re, n_sub, n_bnd_sw});
Array<Float,3> mie_cdf(mie_nc.get_variable<Float>("phase_cdf", {n_bnd_sw, n_sub, n_mie_cdf}), {n_mie_cdf, n_sub, n_bnd_sw});
Array<Float,4> mie_ang(mie_nc.get_variable<Float>("phase_cdf_angle", {n_bnd_sw, n_sub, n_re, n_mie_cdf}), {n_mie_cdf, n_re, n_sub, n_bnd_sw});

Array<Float,4> mie_phase(mie_nc.get_variable<Float>("phase", {n_bnd_sw, n_sub, n_re, n_mie}), {n_mie, n_re, n_sub, n_bnd_sw});
Array<Float,1> mie_phase_ang(mie_nc.get_variable<Float>("phase_angle", {n_mie}), {n_mie});
Expand Down Expand Up @@ -827,7 +829,8 @@ void Radiation_solver_shortwave::solve_gpu(
const int n_gpt = this->kdist_gpu->get_ngpt();
const int n_bnd = this->kdist_gpu->get_nband();

const int n_mie = (switch_cloud_mie) ? this->mie_angs.dim(1) : 0;
const int n_mie = (switch_cloud_mie) ? this->mie_phase_angs.dim(1) : 0;
const int n_mie_cdf = (switch_cloud_mie) ? this->mie_angs.dim(1) : 0;
const int n_re = (switch_cloud_mie) ? this->mie_angs.dim(2) : 0;
const int n_sub = (switch_cloud_mie) ? this->mie_angs.dim(3) : 3;

Expand Down Expand Up @@ -1028,8 +1031,8 @@ void Radiation_solver_shortwave::solve_gpu(

if (switch_cloud_mie)
{
mie_cdfs_sub = mie_cdfs.subset({{ {1, n_mie}, {iwv+1,iwv+1}, {band, band} }});
mie_angs_sub = mie_angs.subset({{ {1, n_mie}, {1, n_re}, {iwv+1,iwv+1}, {band, band} }});
mie_cdfs_sub = mie_cdfs.subset({{ {1, n_mie_cdf}, {iwv+1,iwv+1}, {band, band} }});
mie_angs_sub = mie_angs.subset({{ {1, n_mie_cdf}, {1, n_re}, {iwv+1,iwv+1}, {band, band} }});
mie_phase_sub = mie_phase.subset({{ {1, n_mie}, {1, n_re}, {iwv+1,iwv+1}, {band, band} }});
}

Expand Down Expand Up @@ -1137,7 +1140,8 @@ void Radiation_solver_shortwave::solve_gpu_bb(
const int n_gpt = this->kdist_gpu->get_ngpt();
const int n_bnd = this->kdist_gpu->get_nband();

const int n_mie = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0;
const int n_mie = (switch_cloud_mie) ? this->mie_phase_angs_bb.dim(1) : 0;
const int n_mie_cdf = (switch_cloud_mie) ? this->mie_angs_bb.dim(1) : 0;
const int n_re = (switch_cloud_mie) ? this->mie_angs_bb.dim(2) : 0;

const int cam_nx = radiance.dim(1);
Expand Down Expand Up @@ -1296,8 +1300,8 @@ void Radiation_solver_shortwave::solve_gpu_bb(

if (switch_cloud_mie)
{
mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie}, {1,1}, {band, band} }});
mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }});
mie_cdfs_sub = mie_cdfs_bb.subset({{ {1, n_mie_cdf}, {1,1}, {band, band} }});
mie_angs_sub = mie_angs_bb.subset({{ {1, n_mie_cdf}, {1, n_re}, {1,1}, {band, band} }});
mie_phase_sub = mie_phase_bb.subset({{ {1, n_mie}, {1, n_re}, {1,1}, {band, band} }});
}

Expand Down
Loading

0 comments on commit 626fe30

Please sign in to comment.