Skip to content

Commit

Permalink
added test_loops_omp, a test code with nested openMP (different insta…
Browse files Browse the repository at this point in the history
…nces of CLASS running in parallel, each of them with different threads)
  • Loading branch information
Julien Lesgourgues committed Mar 17, 2016
1 parent a0c60a5 commit aba9f5e
Show file tree
Hide file tree
Showing 4 changed files with 322 additions and 6 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ CLASS = class.o

TEST_LOOPS = test_loops.o

TEST_LOOPS_OMP = test_loops_omp.o

TEST_DEGENERACY = test_degeneracy.o

TEST_TRANSFER = test_transfer.o
Expand Down Expand Up @@ -156,6 +158,9 @@ test_sigma: $(TOOLS) $(SOURCE) $(EXTERNAL) $(OUTPUT) $(TEST_SIGMA)
test_loops: $(TOOLS) $(SOURCE) $(EXTERNAL) $(OUTPUT) $(TEST_LOOPS)
$(CC) $(OPTFLAG) $(OMPFLAG) $(LDFLAG) -o $@ $(addprefix build/,$(notdir $^)) -lm

test_loops_omp: $(TOOLS) $(SOURCE) $(EXTERNAL) $(OUTPUT) $(TEST_LOOPS_OMP)
$(CC) $(OPTFLAG) $(OMPFLAG) $(LDFLAG) -o $@ $(addprefix build/,$(notdir $^)) -lm

test_stephane: $(TOOLS) $(SOURCE) $(EXTERNAL) $(OUTPUT) $(TEST_STEPHANE)
$(CC) $(OPTFLAG) $(OMPFLAG) $(LDFLAG) -o $@ $(addprefix build/,$(notdir $^)) -lm

Expand Down
4 changes: 2 additions & 2 deletions include/nonlinear.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct nonlinear {

//@{

enum non_linear_method method; /**< $$$ definition missing $$$ */
enum non_linear_method method; /**< method for computing non-linear corrections (none, Halogit, etc.) */

//@}

Expand All @@ -40,7 +40,7 @@ struct nonlinear {
double * tau; /**< tau[index_tau] = list of time values */

double * nl_corr_density; /**< nl_corr_density[index_tau * ppt->k_size + index_k] */
double * k_nl; /**< $$$ definition missing $$$ */
double * k_nl; /**< wavenumber at which non-linear corrections become important, defined differently by different non_linear_method's */

//@}

Expand Down
8 changes: 4 additions & 4 deletions include/output.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ struct output {

//@{

short write_header; /**< $$$ definition missing $$$ */
short write_header; /**< flag stating whether we should write a header in output files */

enum file_format output_format; /**< $$$ definition missing $$$ */
enum file_format output_format; /**< which format for output files (definitions, order of columns, etc.) */

short write_background; /**< $$$ definition missing $$$ */
short write_thermodynamics; /**< $$$ definition missing $$$ */
Expand All @@ -63,7 +63,7 @@ struct output {
};

/*************************************************************************************************************/
/* @cond INCLUDE_WITH_DOXYGEN */
/* @cond INCLUDE_WITH_DOXYGEN */
/*
* Boilerplate for C++
*/
Expand Down Expand Up @@ -199,4 +199,4 @@ extern "C" {
#endif

#endif
/* @endcond */
/* @endcond */
311 changes: 311 additions & 0 deletions test/test_loops_omp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,311 @@
/** @file class.c
* Julien Lesgourgues, 17.04.2011
*/

/* this main calls CLASS several times in a loop, with different input
parameters. It illustrates how the code could be interfaced with a
parameter extraction code. */

/* JL 17.03.2016: implemented here nested openMP loops. The user
chooses how many instances of CLASS are run in parallel (by setting
the variable number_of_class_instances below). Each of them uses a
number of thread such that all cores are used. */

#include "class.h"

int class(
struct file_content *pfc,
struct precision * ppr,
struct background * pba,
struct thermo * pth,
struct perturbs * ppt,
struct primordial * ppm,
struct nonlinear * pnl,
struct transfers * ptr,
struct spectra * psp,
struct lensing * ple,
struct output * pop,
int l_max,
double ** cl,
ErrorMsg errmsg) {

int l;

class_call(input_init(pfc,ppr,pba,pth,ppt,ptr,ppm,psp,pnl,ple,pop,errmsg),
errmsg,
errmsg);

class_call(background_init(ppr,pba),
pba->error_message,
errmsg);

class_call(thermodynamics_init(ppr,pba,pth),
pth->error_message,
errmsg);

class_call(perturb_init(ppr,pba,pth,ppt),
ppt->error_message,
errmsg);

class_call(primordial_init(ppr,ppt,ppm),
ppm->error_message,
errmsg);

class_call(nonlinear_init(ppr,pba,pth,ppt,ppm,pnl),
pnl->error_message,
errmsg);

class_call(transfer_init(ppr,pba,pth,ppt,pnl,ptr),
ptr->error_message,
errmsg);

class_call(spectra_init(ppr,pba,ppt,ppm,pnl,ptr,psp),
psp->error_message,
errmsg);

class_call(lensing_init(ppr,ppt,psp,pnl,ple),
ple->error_message,
errmsg);

/****** write the Cl values in the input array cl[l] *******/

for (l=2; l <= l_max; l++) {

class_call(output_total_cl_at_l(psp,ple,pop,(double)l,cl[l]),
psp->error_message,
errmsg);
}

/****** all calculations done, now free the structures ******/

class_call(lensing_free(ple),
ple->error_message,
errmsg);

class_call(spectra_free(psp),
psp->error_message,
errmsg);

class_call(transfer_free(ptr),
ptr->error_message,
errmsg);

class_call(nonlinear_free(pnl),
pnl->error_message,
errmsg);

class_call(primordial_free(ppm),
ppm->error_message,
errmsg);

class_call(perturb_free(ppt),
ppt->error_message,
errmsg);

class_call(thermodynamics_free(pth),
pth->error_message,
errmsg);

class_call(background_free(pba),
pba->error_message,
errmsg);

return _SUCCESS_;

}

int main() {

/* shared variable that will be common to all CLASS instances */
int i;
int l,l_max;
int num_ct_max=7;
int num_loops=10;

struct file_content fc;
ErrorMsg errmsg_parser;

int total_number_of_threads;
int number_of_class_instances;
int number_of_threads_inside_class;

int index_ct_tt;
int index_ct_ee;
int index_ct_te;

/* dealing with the openMP part (number of instances, number of
threads per instance...) */

#ifdef _OPENMP

/* Determine the number of threads, class instances and nested
threads */
total_number_of_threads = omp_get_max_threads();

/* User-fixed number of CLASS instances to be run in parallel (Total
number of threads should be dividable by this number) */
number_of_class_instances=2;

if ((total_number_of_threads % number_of_class_instances) != 0)
printf("The total number of threads, %d, is not a mutiple of the requested number of CLASS instances, %d\n",total_number_of_threads,number_of_class_instances);
number_of_threads_inside_class = total_number_of_threads/number_of_class_instances;

/* inferred number of threads per instance */
printf("# Total number of available threads = %d, used to run\n",
total_number_of_threads);
printf("# -> %d CLASS executables in parallel\n",
number_of_class_instances);
printf("# -> %d threads inside each CLASS executables\n",
number_of_threads_inside_class);

/* Turn on nested parallelism */
omp_set_nested(1);
#endif

/* choose a value of l_max in C_l's */
l_max=3000;

/* all parameters for which we don't want to keep default values
should be passed to the code through a file_content
structure. Create such a structure with the size you need: 10 in
this exemple */
parser_init(&fc,10,"",errmsg_parser);

/* assign values to these 9 parameters. Some will be fixed, some
will be varied in the loop. */
strcpy(fc.name[0],"output");
strcpy(fc.value[0],"tCl,pCl,lCl");

strcpy(fc.name[1],"l_max_scalars");
sprintf(fc.value[1],"%d",l_max);

strcpy(fc.name[2],"lensing");
sprintf(fc.value[2],"yes");

strcpy(fc.name[3],"H0");
sprintf(fc.value[3],"%e",72.);

strcpy(fc.name[4],"omega_b");
sprintf(fc.value[4],"%e",0.024);

strcpy(fc.name[5],"omega_cdm");
sprintf(fc.value[5],"%e",0.05);

strcpy(fc.name[6],"z_reio");
sprintf(fc.value[6],"%e",10.);

strcpy(fc.name[7],"A_s");
sprintf(fc.value[7],"%e",2.3e-9);

strcpy(fc.name[8],"n_s");
sprintf(fc.value[8],"%e",1.);

strcpy(fc.name[9],"perturbations_verbose");
sprintf(fc.value[9],"%d",0); // Trick: set to 2 to cross-check actual number of threads per CLASS instance

/* Create an array of Cl's where all results will be stored for each parameter value in the loop */
double *** cl;
cl = malloc(num_loops*sizeof(double**));

/* Create one thread for each instance of CLASS */
#pragma omp parallel num_threads(number_of_class_instances)
{

/* set the number of threads inside each CLASS instance */
#ifdef _OPENMP
omp_set_num_threads(number_of_threads_inside_class);
#endif

/* for each thread/instance, create all CLASS input/output
structures (these variables are being declared insode the
parallel zone, hence they are openMP private variables) */

struct precision pr; /* for precision parameters */
struct background ba; /* for cosmological background */
struct thermo th; /* for thermodynamics */
struct perturbs pt; /* for source functions */
struct transfers tr; /* for transfer functions */
struct primordial pm; /* for primordial spectra */
struct spectra sp; /* for output spectra */
struct nonlinear nl; /* for non-linear spectra */
struct lensing le; /* for lensed spectra */
struct output op; /* for output files */
ErrorMsg errmsg; /* for error messages */

struct file_content fc_local;
int j,iam;

/* copy the shared file content into the local file content used by each instance */
parser_init(&fc_local,fc.size,"",errmsg);
for (j=0; j < fc.size; j++) {
strcpy(fc_local.value[j],fc.value[j]);
strcpy(fc_local.name[j],fc.name[j]);
fc_local.read[j]=fc.read[j];
}

/* loop over (num_loops) values of some parameters: in this exemple, omega_b */
#pragma omp for schedule(static,1)
for (i=0; i<=num_loops; i++) {

#ifdef _OPENMP
iam=omp_get_thread_num();
#else
iam=0;
#endif

/* assign one value to omega_b */
sprintf(fc_local.value[4],"%e",0.01+i*0.002);

printf("# %d\tthread=%d : running with omega_b = %s\n",i,iam,fc_local.value[4]);

/* allocate the array where the Cl's calculated by one instance
will be written (we could add another array with P(k), or
extract other results from the code - here we assume that we
are interested in the C_l's only */
cl[i]=malloc((l_max+1)*sizeof(double*));
for (l=0;l<=l_max;l++)
cl[i][l]=malloc(num_ct_max*sizeof(double));

/* calls class and return the C_l's*/
if (class(&fc_local,&pr,&ba,&th,&pt,&pm,&nl,&tr,&sp,&le,&op,l_max,cl[i],errmsg) == _FAILURE_) {
printf("\n\nError in class \n=>%s\n",errmsg);
//return _FAILURE_;
}

/* if this is the first call, extract dynamically the value of indices used in the output */
if ((i==0) && (iam==0)) {
index_ct_tt=sp.index_ct_tt;
index_ct_te=sp.index_ct_te;
index_ct_ee=sp.index_ct_ee;
}

} // end of loop over parameters
} // end parallel zone

/* write in file the lensed C_l^TT, C_l^EE, C_l^TE's obtained in all runs */

FILE * out=fopen("output/test_loops_omp.dat","w");

for (i=0; i<num_loops; i++) {
for (l=2;l<=l_max;l++) {
fprintf(out,"%d %e %e %e\n",
l,
l*(l+1)*cl[i][l][index_ct_tt],
l*(l+1)*cl[i][l][index_ct_ee],
l*(l+1)*cl[i][l][index_ct_te]);
}
fprintf(out,"\n");
}

/* free Cl's array */
for (i=0; i<num_loops; i++) {
for (l=0;l<l_max;l++) {
free(cl[i][l]);
}
free(cl[i]);
}
free(cl);

return _SUCCESS_;

}

0 comments on commit aba9f5e

Please sign in to comment.