Skip to content

Commit

Permalink
Make corrections to fix compilation errors.
Browse files Browse the repository at this point in the history
  • Loading branch information
alserene committed Nov 8, 2024
1 parent a374af3 commit 8e62698
Showing 1 changed file with 20 additions and 19 deletions.
39 changes: 20 additions & 19 deletions src/py21cmfast/src/SpinTemperatureBox.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,19 @@
#include "dft.h"
#include "filtering.h"
#include "thermochem.h"
#include "interpolation.h"

#include "SpinTemperatureBox.h"


__device__ inline double EvaluateRGTable1D_f_gpu(double x, RGTable1D_f *table) {
__device__ inline double EvaluateRGTable1D_f_gpu(double x, double *x_min, double *x_width, double *y_arr) {

double x_min = table->x_min;
double x_width = table->x_width;
int idx = (int)floor((x - *x_min) / *x_width);

int idx = (int)floor((x - x_min) / x_width);
double table_val = *x_min + *x_width * (float)idx;
double interp_point = (x - table_val) / *x_width;

double table_val = x_min + x_width * (float)idx;
double interp_point = (x - table_val) / x_width;

return table->y_arr[idx] * (1 - interp_point) + table->y_arr[idx + 1] * (interp_point);
return y_arr[idx] * (1 - interp_point) + y_arr[idx + 1] * (interp_point);
}

template <unsigned int threadsPerBlock>
Expand All @@ -58,7 +56,9 @@ __device__ void warp_reduce(volatile double *sdata, unsigned int tid) {

template <unsigned int threadsPerBlock>
__global__ void compute_and_reduce(
RGTable1D_f *SFRD_conditional_table, // input data
double *x_min, // input data
double *x_width, // input data
double *y_arr, // input data
float *dens_R_grid, // input data
double zpp_growth_R_ct, // input value
float *sfrd_grid, // star formation rate density grid to be updated
Expand Down Expand Up @@ -89,8 +89,8 @@ __global__ void compute_and_reduce(
curr_dens_j = dens_R_grid[i + threadsPerBlock] * zpp_growth_R_ct;

// Compute fraction of mass that has collapsed to form stars/other structures
fcoll_i = exp(EvaluateRGTable1D_f_gpu(curr_dens_i, &SFRD_conditional_table));
fcoll_j = exp(EvaluateRGTable1D_f_gpu(curr_dens_j, &SFRD_conditional_table));
fcoll_i = exp(EvaluateRGTable1D_f_gpu(curr_dens_i, x_min, x_width, y_arr));
fcoll_j = exp(EvaluateRGTable1D_f_gpu(curr_dens_j, x_min, x_width, y_arr));

// Update the shared buffer with the collapse fractions
sdata[tid] += fcoll_i + fcoll_j;
Expand All @@ -109,7 +109,7 @@ __global__ void compute_and_reduce(
if (threadsPerBlock >= 128) { if (tid < 64) { sdata[tid] += sdata[tid + 64]; } __syncthreads(); }

// Final reduction by separate kernel
if (tid < 32) warp_reduce(sdata, tid);
if (tid < 32) warp_reduce<threadsPerBlock>(sdata, tid);

// The first thread of each block updates the block totals
if (tid == 0) ave_sfrd_buf[blockIdx.x] = sdata[0];
Expand Down Expand Up @@ -190,23 +190,23 @@ void calculate_sfrd_from_grid_gpu(
// Invoke kernel
switch (threadsPerBlock) {
case 512:
compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels);
compute_and_reduce<512><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels);
break;
case 256:
compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels);
compute_and_reduce<256><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels);
break;
case 128:
compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels);
compute_and_reduce<128><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels);
break;
case 64:
compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels);
compute_and_reduce<64><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels);
break;
case 32:
compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels);
compute_and_reduce<32><<< numBlocks, threadsPerBlock, smemSize >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels);
break;
default:
// LOG_WARNING("Thread size invalid; defaulting to 256.")
compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(&x_min, &x_width, &y_arr, &d_dens_R_grid, zpp_growth_R_ct, &d_sfrd_grid, &d_ave_sfrd_buf, num_pixels);
compute_and_reduce<256><<< numBlocks, 256, 256 * sizeof(double) >>>(x_min, x_width, y_arr, d_dens_R_grid, zpp_growth_R_ct, d_sfrd_grid, d_ave_sfrd_buf, num_pixels);
}

// Only use during development!
Expand All @@ -224,7 +224,8 @@ void calculate_sfrd_from_grid_gpu(
// Wrap device pointer in a thrust::device_ptr
thrust::device_ptr<double> d_ave_sfrd_buf_ptr(d_ave_sfrd_buf);
// Reduce final buffer values to one value
ave_sfrd_buf = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus<double>());
double reduced_value = thrust::reduce(d_ave_sfrd_buf_ptr, d_ave_sfrd_buf_ptr + buffer_length, 0., thrust::plus<double>());
*ave_sfrd_buf = reduced_value;

// Copy results from device to host.
err = cudaMemcpy(sfrd_grid, d_sfrd_grid, sizeof(float) * num_pixels, cudaMemcpyDeviceToHost);
Expand Down

0 comments on commit 8e62698

Please sign in to comment.