34
34
#include < thrust/device_vector.h>
35
35
#include < thrust/copy.h>
36
36
#include < thrust/iterator/constant_iterator.h>
37
- #else
38
- #define checkCudaErrors (x ) {}
39
37
#endif
40
38
41
39
#define THREADS_PER_BLOCK 64
42
- #define LOG_FILE " spinwalk.log"
43
40
44
41
namespace bl = boost::log;
42
+ using namespace indicators ;
45
43
46
44
bool run (simulation_parameters param, std::map<std::string, std::vector<std::string> > filenames, std::vector<double > fov_scale)
47
45
{
46
+ auto start_config = std::chrono::high_resolution_clock::now ();
47
+ int64_t old_elapsed = 0 ;
48
48
// ========== checking number of GPU(s) ==========
49
49
int32_t device_count=1 ;
50
+ #ifdef __CUDACC__
50
51
checkCudaErrors (cudaGetDeviceCount (&device_count));
51
52
BOOST_LOG_TRIVIAL (info) << " Number of available GPU(s): " << device_count;
52
-
53
+ # endif
53
54
// param.n_spins /= device_count; // spins will be distributed in multiple GPUs (if there is). We suppose it is divisible
54
55
size_t numBlocks = (param.n_spins + THREADS_PER_BLOCK - 1 ) / THREADS_PER_BLOCK;
55
56
56
57
// ========== allocate memory on CPU ==========
57
- auto st = std::chrono::steady_clock::now ();
58
58
size_t trj = param.enRecordTrajectory ? param.n_timepoints * (param.n_dummy_scan + 1 ) : 1 ;
59
59
size_t len0 = 3 * param.n_spins ;
60
60
size_t len1 = len0 * param.n_fov_scale * trj;
@@ -68,7 +68,6 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
68
68
std::vector<float > M0 (len0, 0 .f ); // memory layout(row-major): [n_spins x 3]
69
69
std::vector<float > M1 (len2, 0 .f ); // memory layout(row-major): [n_fov_scale x n_spins x n_TE x 3]
70
70
std::vector<uint8_t > T (M1.size ()/3 , 0 ); // memory layout(row-major): [n_fov_scale x n_spins x n_TE x 1]
71
- BOOST_LOG_TRIVIAL (info) << " Memory allocation (CPU) took " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now () - st).count () << " ms" ;
72
71
73
72
// ========== allocate memory on GPU ==========
74
73
#ifdef __CUDACC__
@@ -84,11 +83,6 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
84
83
checkCudaErrors (cudaMalloc (&d_param, sizeof (simulation_parameters)));
85
84
if (param.fieldmap_exist )
86
85
d_pFieldMap.resize (fieldmap.size ());
87
- // ==========^==========
88
- cudaEvent_t start;
89
- cudaEvent_t end;
90
- checkCudaErrors (cudaEventCreate (&start));
91
- checkCudaErrors (cudaEventCreate (&end));
92
86
#endif
93
87
94
88
for (int16_t fieldmap_no=0 ; fieldmap_no<param.n_fieldmaps ; fieldmap_no++)
@@ -99,9 +93,9 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
99
93
return false ;
100
94
param.matrix_length = mask.size (); // update the matrix length based on the mask size from the recent read
101
95
#ifdef __CUDACC__
102
- if (d_pMask.size () != mask.size ())
96
+ if (d_pMask.size () != mask.size () && param. no_gpu == false )
103
97
d_pMask.resize (mask.size ());
104
- if (d_pFieldMap.size () != fieldmap.size () && param.fieldmap_exist )
98
+ if (d_pFieldMap.size () != fieldmap.size () && param.fieldmap_exist && param. no_gpu == false )
105
99
d_pFieldMap.resize (fieldmap.size ());
106
100
#endif
107
101
// convert fieldmap from T to degree per timestep
@@ -163,10 +157,8 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
163
157
d_pFieldMap = fieldmap;
164
158
}
165
159
#endif
166
- // ========== run ==========
167
- checkCudaErrors (cudaEventRecord (start));
168
- // tqdm bar;
169
- using namespace indicators ;
160
+ // ========== run ==========
161
+ auto start_sim = std::chrono::high_resolution_clock::now ();
170
162
ProgressBar bar{option::ShowPercentage{true }, option::Start{" [" }, option::Fill{" =" }, option::Lead{" >" }, option::End{" ]" }};
171
163
simulation_parameters param_local;
172
164
memcpy (¶m_local, ¶m, sizeof (simulation_parameters));
@@ -204,9 +196,9 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
204
196
T.data () + param.n_TE *param.n_spins *sl,
205
197
spin);});
206
198
}
199
+ #ifdef __CUDACC__
207
200
else
208
201
{
209
- #ifdef __CUDACC__
210
202
BOOST_LOG_TRIVIAL (info) << " GPU " << 1 << " ) Fieldmap " << fieldmap_no << " , simulating sample length scale " << fov_scale[sl];
211
203
checkCudaErrors (cudaMemcpy (d_param, ¶m_local, sizeof (simulation_parameters), cudaMemcpyHostToDevice));
212
204
// scale position to mimic the different volume size
@@ -227,19 +219,19 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
227
219
thrust::copy (d_XYZ1.begin (), d_XYZ1.end (), XYZ1.begin () + shift);
228
220
shift = param.n_TE *param.n_spins *sl;
229
221
thrust::copy (d_T.begin (), d_T.end (), T.begin () + shift);
230
- #endif
231
222
}
223
+ #endif
232
224
// bar.progress(sl, param.n_fov_scale);
233
225
bar.set_progress (100 * (sl+1 )/float (param.n_fov_scale ));
234
226
}
235
- // bar.finish();
227
+
228
+ auto end_config = std::chrono::high_resolution_clock::now ();
229
+ auto elapsed_sim = std::chrono::duration_cast<std::chrono::milliseconds>(end_config - start_sim).count () / 1000.0 ;
230
+ auto elapsed_config = std::chrono::duration_cast<std::chrono::seconds>(end_config - start_config).count ();
231
+ int precision = elapsed_sim>10 ? 0 : (elapsed_sim > 1 ? 1 : 3 );
232
+ std::cout << " Simulation took " << std::fixed << std::setprecision (precision) << elapsed_sim << " sec., everything else took " << elapsed_config - elapsed_sim - old_elapsed<< " sec.\n " ;
233
+ old_elapsed = elapsed_config;
236
234
237
- float elapsedTime;
238
- checkCudaErrors (cudaEventRecord (end));
239
- checkCudaErrors (cudaEventSynchronize (end));
240
- checkCudaErrors (cudaDeviceSynchronize ());
241
- checkCudaErrors (cudaEventElapsedTime (&elapsedTime, start, end));
242
- std::cout << " Simulation over " << 1 << " GPU(s) took " << std::fixed << std::setprecision (2 ) << elapsedTime/1000 . << " second(s)" << ' \n ' ;
243
235
// ========== save results ==========
244
236
std::cout << " Saving the results to disk." << " \n " ;
245
237
std::string f = filenames.at (" output" )[fieldmap_no];
@@ -262,11 +254,11 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
262
254
file_utils::save_h5 (f, fov_scale.data (), dims, " scales" , " double" );
263
255
}
264
256
265
- // ========== clean up GPU ==========
257
+ #ifdef __CUDACC__
258
+ // ============ clean up GPU ============
266
259
checkCudaErrors (cudaFree (d_param));
267
- checkCudaErrors (cudaEventDestroy (start));
268
- checkCudaErrors (cudaEventDestroy (end));
269
-
260
+ #endif
261
+
270
262
return true ;
271
263
}
272
264
@@ -294,7 +286,7 @@ int main(int argc, char * argv[])
294
286
bool bStatus = true ;
295
287
std::string phantom_output;
296
288
float phantom_radius, phantom_fov, phantom_dchi, phantom_oxy_level, phantom_orientation, phantom_BVF;
297
- int32_t phantom_resolution, phantom_seed;
289
+ int32_t phantom_resolution, phantom_seed, device_id = 0 ;
298
290
std::vector<std::string> config_files;
299
291
print_logo ();
300
292
// ========== parse command line arguments ==========
@@ -304,7 +296,8 @@ int main(int argc, char * argv[])
304
296
(" help,h" , " help message (this menu)" )
305
297
(" configs,c" , po::value<std::vector<std::string>>(&config_files)->multitoken (), " config. files as many as you want. e.g. -c config1.ini config2.ini ... configN.ini" )
306
298
#ifdef __CUDACC__
307
- (" no_gpu,g" , " only run on CPU(s) (default: GPU)" )
299
+ (" use_cpu,p" , " only run on CPU (default: GPU)" )
300
+ (" device,g" ,po::value<int32_t >(&device_id)->default_value (0 ), " select GPU device (if there are multiple GPUs)" )
308
301
#endif
309
302
(" cylinder,C" , " generate phantom filled with cylinders" )
310
303
(" sphere,S" , " generate phantom filled with spheres" )
@@ -330,7 +323,8 @@ int main(int argc, char * argv[])
330
323
return 1 ;
331
324
}
332
325
333
- auto fileSink = bl::add_file_log (bl::keywords::file_name=LOG_FILE, bl::keywords::target_file_name = LOG_FILE, bl::keywords::format = " [%TimeStamp%] [%Severity%]: %Message%" , bl::keywords::auto_flush = true );
326
+ std::string log_filename = " spinwalk_" + std::to_string (device_id) + " .log" ;
327
+ auto fileSink = bl::add_file_log (bl::keywords::file_name=log_filename, bl::keywords::target_file_name = log_filename, bl::keywords::format = " [%TimeStamp%] [%Severity%]: %Message%" , bl::keywords::auto_flush = true );
334
328
bl::add_common_attributes ();
335
329
336
330
// ========== print help ==========
@@ -341,7 +335,7 @@ int main(int argc, char * argv[])
341
335
return 0 ;
342
336
}
343
337
344
- std::cout << " Log file location: " << std::filesystem::current_path () / LOG_FILE << ' \n ' ;
338
+ std::cout << " Log file location: " << std::filesystem::current_path () / log_filename << ' \n ' ;
345
339
346
340
// ========== generate phantom ==========
347
341
if (vm.count (" cylinder" ))
@@ -360,7 +354,7 @@ int main(int argc, char * argv[])
360
354
return 0 ;
361
355
362
356
std::cout << " Running simulation for " << config_files.size () << " config(s)..." << " \n\n " ;
363
- auto start = std::chrono::steady_clock ::now ();
357
+ auto start = std::chrono::high_resolution_clock ::now ();
364
358
for (const auto & cfile : config_files)
365
359
{
366
360
std::cout << " <" << std::filesystem::path (cfile).filename ().string () << " >\n " ;
@@ -372,7 +366,7 @@ int main(int argc, char * argv[])
372
366
373
367
std::vector<double > fov_scale;
374
368
simulation_parameters param;
375
- param.no_gpu = vm.count (" no_gpu " ) > 0 ;
369
+ param.no_gpu = vm.count (" use_cpu " ) > 0 ;
376
370
377
371
// ========== read config file ==========
378
372
bStatus &= file_utils::read_config (cfile, ¶m, fov_scale, filenames);
@@ -385,18 +379,30 @@ int main(int argc, char * argv[])
385
379
386
380
if (bStatus == false )
387
381
{
388
- std::cout << ERR_MSG << " Simulation failed. See the log file " << LOG_FILE <<" , Aborting...!" << std::endl;
382
+ std::cout << ERR_MSG << " Simulation failed. See the log file " << log_filename <<" , Aborting...!" << std::endl;
389
383
return 1 ;
390
384
}
391
385
386
+ #ifdef __CUDACC__
387
+ if (param.no_gpu == false )
388
+ {
389
+ if (device_id >= getDeviceCount ())
390
+ {
391
+ std::cout << ERR_MSG << " Device ID " << device_id << " is not available! Aborting...!" << std::endl;
392
+ return 1 ;
393
+ }
394
+ cudaSetDevice (device_id);
395
+ }
396
+ #endif
397
+
392
398
std::cout << " Simulation starts..." << std::endl;
393
399
if (run (param, filenames, fov_scale) == false )
394
400
{
395
- std::cout << ERR_MSG << " Simulation failed. See the log file " << LOG_FILE <<" , Aborting...!" << std::endl;
401
+ std::cout << ERR_MSG << " Simulation failed. See the log file " << log_filename <<" , Aborting...!" << std::endl;
396
402
return 1 ;
397
403
}
398
404
std::cout << " \n " ;
399
405
}
400
- std::cout << " Simulation(s) finished successfully! Total elapsed time = " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock ::now () - start).count ()/1000 . << " second(s)." << std::endl;
406
+ std::cout << " Simulation(s) finished successfully! Total elapsed time = " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock ::now () - start).count ()/1000 . << " second(s)." << std::endl;
401
407
return 0 ;
402
408
}
0 commit comments