diff --git a/cudampilib/cudampilib.c b/cudampilib/cudampilib.c index c79c12e..deb4ddf 100644 --- a/cudampilib/cudampilib.c +++ b/cudampilib/cudampilib.c @@ -72,6 +72,7 @@ float __cudampi__globalpowerlimit; int powermeasurecounter[__CUDAMPI_MAX_THREAD_COUNT] = {0}; int __cudampi__batch_size; +int __cudampi__cpu_enabled; extern struct __cudampi__arguments_type __cudampi__arguments; static char doc[] = "Cudampi program"; @@ -507,7 +508,10 @@ void __cudampi__initializeMPI(int argc, char **argv) { } __cudampi__batch_size = __cudampi__arguments.batch_size; + __cudampi__cpu_enabled = __cudampi__arguments.cpu_enabled; MPI_Bcast(&__cudampi__batch_size, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&__cudampi__cpu_enabled, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Allgather(&__cudampi__localGpuDeviceCount, 1, MPI_INT, __cudampi__GPUcountspernode, 1, MPI_INT, MPI_COMM_WORLD); @@ -516,13 +520,6 @@ void __cudampi__initializeMPI(int argc, char **argv) { MPI_Allgather(&__cudampi__localFreeThreadCount, 1, MPI_INT, __cudampi__freeThreadsPerNode, 1, MPI_INT, MPI_COMM_WORLD); - if (!__cudampi__arguments.cpu_enabled){ - for (int i=0; i < __cudampi__MPIproccount; i++){ - __cudampi__freeThreadsPerNode[i] = 0; - } - } - - // check if there is a configuration file FILE *filep = fopen("__cudampi.conf", "r"); diff --git a/cudampilib/cudampislave.c b/cudampilib/cudampislave.c index 3154402..6053615 100644 --- a/cudampilib/cudampislave.c +++ b/cudampilib/cudampislave.c @@ -45,6 +45,7 @@ int __cudampi__localGpuDeviceCount = 1; int __cudampi__localFreeThreadCount = 0; int __cudampi__batch_size; +int __cudampi__cpu_enabled; unsigned long cpuStreamsValid[CPU_STREAMS_SUPPORTED]; @@ -483,17 +484,23 @@ int main(int argc, char **argv) { exit(-1); // we could exit in a nicer way! TBD } - if (cudaSuccess != __cudampi__getCpuFreeThreads(&__cudampi__localFreeThreadCount)) { - log_message(LOG_ERROR, "Error invoking __cudampi__getCpuFreeThreads()"); - exit(-1); + MPI_Bcast(&__cudampi__batch_size, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&__cudampi__cpu_enabled, 1, MPI_INT, 0, MPI_COMM_WORLD); + + if (__cudampi__cpu_enabled){ + if (cudaSuccess != __cudampi__getCpuFreeThreads(&__cudampi__localFreeThreadCount)) { + log_message(LOG_ERROR, "Error invoking __cudampi__getCpuFreeThreads()"); + exit(-1); + } + } + else { + __cudampi__localFreeThreadCount = 0; } MPI_Allgather(&__cudampi__localGpuDeviceCount, 1, MPI_INT, __cudampi__GPUcountspernode, 1, MPI_INT, MPI_COMM_WORLD); MPI_Allgather(&__cudampi__localFreeThreadCount, 1, MPI_INT, __cudampi__freeThreadsPerNode, 1, MPI_INT, MPI_COMM_WORLD); - MPI_Bcast(&__cudampi__batch_size, 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Bcast(&__cudampi_totaldevicecount, 1, MPI_INT, 0, MPI_COMM_WORLD); __cudampi_targetMPIrankfordevice = (int *)malloc(__cudampi_totaldevicecount * sizeof(int));