Skip to content

Commit

Permalink
Misc changes during debugging.
Browse files Browse the repository at this point in the history
  • Loading branch information
alserene committed Oct 25, 2024
1 parent b939e36 commit 3943d2d
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 16 deletions.
9 changes: 5 additions & 4 deletions src/py21cmfast/src/PerturbField.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ int ComputePerturbField_cpu(

// ************ END INITIALIZATION **************************** //

// Perturbing the density field required adding over multiple cells. Store intermediate result as a double to avoid rounding errors
// Perturbing the density field requires adding over multiple cells. Store intermediate result as a double to avoid rounding errors
if(user_params->PERTURB_ON_HIGH_RES) {
resampled_box = (double *)calloc(TOT_NUM_PIXELS,sizeof(double));
}
Expand Down Expand Up @@ -775,9 +775,10 @@ int ComputePerturbField(
float redshift, UserParams *user_params, CosmoParams *cosmo_params,
InitialConditions *boxes, PerturbedField *perturbed_field
){
if (1) {
ComputePerturbField_gpu(redshift, user_params, cosmo_params, boxes, perturbed_field)
// int result;
if (0) {
return ComputePerturbField_gpu(redshift, user_params, cosmo_params, boxes, perturbed_field);
} else {
ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field)
return ComputePerturbField_cpu(redshift, user_params, cosmo_params, boxes, perturbed_field);
}
}
61 changes: 49 additions & 12 deletions src/py21cmfast/src/PerturbField.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void compute_perturbed_velocities(
LOWRES_density_perturb[0][1] = 0.;
}
}
else{
else {
if(user_params->PERTURB_ON_HIGH_RES) {
// HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]*I/k_sq/(TOT_NUM_PIXELS+0.0);
// HIRES_density_perturb[C_INDEX(n_x,n_y,n_z)] *= dDdt_over_D*kvec[axis]/k_sq/(TOT_NUM_PIXELS+0.0);
Expand Down Expand Up @@ -420,8 +420,8 @@ int ComputePerturbField_gpu(
LOWRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*HII_KSPACE_NUM_PIXELS);

if(user_params->PERTURB_ON_HIGH_RES) {
HIRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS);
HIRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS);
HIRES_density_perturb = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); // D * D * NCF * D/2
HIRES_density_perturb_saved = (fftwf_complex *) fftwf_malloc(sizeof(fftwf_complex)*KSPACE_NUM_PIXELS); // sizeof(fftwf_complex) = 2 * sizeof(float)?
}

// double *resampled_box;
Expand All @@ -439,10 +439,17 @@ int ComputePerturbField_gpu(
#pragma omp parallel shared(growth_factor,boxes,LOWRES_density_perturb,HIRES_density_perturb,dimension) private(i,j,k) num_threads(user_params->N_THREADS)
{
#pragma omp for
for (i=0; i<dimension; i++){
for (j=0; j<dimension; j++){
for (k=0; k<(unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension); k++){
for (i=0; i<dimension; i++){ // D
for (j=0; j<dimension; j++){ // D
for (k=0; k<(unsigned long long)(user_params->NON_CUBIC_FACTOR*dimension); k++){ // NCF * D
// max (i, j, k) = (D, D, NCF * D)
if(user_params->PERTURB_ON_HIGH_RES) {
// HIRES_density_perturb is of type fftwf_complex
// HIRES_density_perturb has size D * D * NCF * D/2

// hires_density is of type float
// hires_density has size D * D * NCF * D

*((float *)HIRES_density_perturb + R_FFT_INDEX(i,j,k)) = growth_factor*boxes->hires_density[R_INDEX(i,j,k)];
}
else {
Expand Down Expand Up @@ -557,15 +564,28 @@ int ComputePerturbField_gpu(
// Allocat host memory for output box
double* resampled_box = (double*)malloc(size);

// Allocate device memory for output box
// Allocate device memory for output box and set to 0.
double* d_box;
cudaMalloc(&d_box, size);
cudaMemset(d_box, 0, size);
cudaMemset(d_box, 0, size); // fills size bytes with byte=0

err = cudaGetLastError();
if (err != cudaSuccess) {
LOG_ERROR("CUDA error: %s", cudaGetErrorString(err));
Throw(CudaError);
}

// Allocate device memory for density field
float* hires_density;
cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)));
cudaMemcpy(hires_density, boxes->hires_density, (HII_TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice);
// cudaMalloc(&hires_density, (HII_TOT_NUM_PIXELS * sizeof(double))); // from outputs.py & indexing.h
cudaMalloc(&hires_density, (TOT_NUM_PIXELS * sizeof(double))); // from outputs.py & indexing.h
cudaMemcpy(hires_density, boxes->hires_density, (TOT_NUM_PIXELS * sizeof(double)), cudaMemcpyHostToDevice);

err = cudaGetLastError();
if (err != cudaSuccess) {
LOG_ERROR("CUDA error: %s", cudaGetErrorString(err));
Throw(CudaError);
}

// Allocate device memory and copy arrays to device as per user_params
float* hires_vx;
Expand Down Expand Up @@ -616,7 +636,13 @@ int ComputePerturbField_gpu(
}
}

// Can't seem to pass macro straight to kernel.
err = cudaGetLastError();
if (err != cudaSuccess) {
LOG_ERROR("CUDA error: %s", cudaGetErrorString(err));
Throw(CudaError);
}

// Seemingly can't pass macro straight to kernel
long long d_para = D_PARA;
long long hii_d = HII_D;
long long hii_d_para = HII_D_PARA;
Expand All @@ -640,7 +666,12 @@ int ComputePerturbField_gpu(
}

// Copy results from device to host
cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost);
// cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost);
cudaError_t err = cudaMemcpy(resampled_box, d_box, size, cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
LOG_ERROR("CUDA error: %s", cudaGetErrorString(err));
Throw(CudaError);
}

// Deallocate device memory
cudaFree(d_box);
Expand Down Expand Up @@ -669,6 +700,12 @@ int ComputePerturbField_gpu(
}
}

err = cudaGetLastError();
if (err != cudaSuccess) {
LOG_ERROR("CUDA error: %s", cudaGetErrorString(err));
Throw(CudaError);
}

// ----------------------------------------------------------------------------------------------------------------------------

LOG_SUPER_DEBUG("resampled_box: ");
Expand Down

0 comments on commit 3943d2d

Please sign in to comment.