Skip to content

Commit efad252

Browse files
committed
macro for cuda
1 parent e0b9097 commit efad252

File tree

3 files changed

+47
-41
lines changed

3 files changed

+47
-41
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ if(CUDA_FOUND)
2626
# set(CUDA_ARCH "-arch=sm_75" CACHE STRING "CUDA architecture")
2727
# set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES native)
2828
# set_property(TARGET ${target} PROPERTY CUDA_ARCHITECTURES 70 75 80 86 90)
29-
set(CMAKE_CUDA_ARCHITECTURES 70 72 75 86 89 90)
29+
set(CMAKE_CUDA_ARCHITECTURES 75 86 89)
3030
# set_property(TARGET ${target} PROPERTY CUDA_SEPARABLE_COMPILATION ON)
3131
# Add CUDA and OpenMP flags
3232
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_ARCH} -Xcompiler=${OpenMP_CXX_FLAGS}")

src/spinwalk.cu

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -34,27 +34,27 @@
3434
#include <thrust/device_vector.h>
3535
#include <thrust/copy.h>
3636
#include <thrust/iterator/constant_iterator.h>
37-
#else
38-
#define checkCudaErrors(x) {}
3937
#endif
4038

4139
#define THREADS_PER_BLOCK 64
42-
#define LOG_FILE "spinwalk.log"
4340

4441
namespace bl = boost::log;
42+
using namespace indicators;
4543

4644
bool run(simulation_parameters param, std::map<std::string, std::vector<std::string> > filenames, std::vector<double> fov_scale)
4745
{
46+
auto start_config = std::chrono::high_resolution_clock::now();
47+
int64_t old_elapsed = 0;
4848
// ========== checking number of GPU(s) ==========
4949
int32_t device_count=1;
50+
#ifdef __CUDACC__
5051
checkCudaErrors(cudaGetDeviceCount(&device_count));
5152
BOOST_LOG_TRIVIAL(info) << "Number of available GPU(s): " << device_count;
52-
53+
#endif
5354
// param.n_spins /= device_count; // spins will be distributed in multiple GPUs (if there is). We suppose it is divisible
5455
size_t numBlocks = (param.n_spins + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
5556

5657
// ========== allocate memory on CPU ==========
57-
auto st = std::chrono::steady_clock::now();
5858
size_t trj = param.enRecordTrajectory ? param.n_timepoints * (param.n_dummy_scan + 1) : 1;
5959
size_t len0 = 3 * param.n_spins;
6060
size_t len1 = len0 * param.n_fov_scale * trj;
@@ -68,7 +68,6 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
6868
std::vector<float> M0(len0, 0.f); // memory layout(row-major): [n_spins x 3]
6969
std::vector<float> M1(len2, 0.f); // memory layout(row-major): [n_fov_scale x n_spins x n_TE x 3]
7070
std::vector<uint8_t> T(M1.size()/3, 0); // memory layout(row-major): [n_fov_scale x n_spins x n_TE x 1]
71-
BOOST_LOG_TRIVIAL(info) << "Memory allocation (CPU) took " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - st).count() << " ms";
7271

7372
// ========== allocate memory on GPU ==========
7473
#ifdef __CUDACC__
@@ -84,11 +83,6 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
8483
checkCudaErrors(cudaMalloc(&d_param, sizeof(simulation_parameters)));
8584
if(param.fieldmap_exist)
8685
d_pFieldMap.resize(fieldmap.size());
87-
// ==========^==========
88-
cudaEvent_t start;
89-
cudaEvent_t end;
90-
checkCudaErrors(cudaEventCreate(&start));
91-
checkCudaErrors(cudaEventCreate(&end));
9286
#endif
9387

9488
for (int16_t fieldmap_no=0; fieldmap_no<param.n_fieldmaps; fieldmap_no++)
@@ -99,9 +93,9 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
9993
return false;
10094
param.matrix_length = mask.size(); // update the matrix length based on the mask size from the recent read
10195
#ifdef __CUDACC__
102-
if(d_pMask.size() != mask.size())
96+
if(d_pMask.size() != mask.size() && param.no_gpu == false)
10397
d_pMask.resize(mask.size());
104-
if(d_pFieldMap.size() != fieldmap.size() && param.fieldmap_exist)
98+
if(d_pFieldMap.size() != fieldmap.size() && param.fieldmap_exist && param.no_gpu == false)
10599
d_pFieldMap.resize(fieldmap.size());
106100
#endif
107101
// convert fieldmap from T to degree per timestep
@@ -163,10 +157,8 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
163157
d_pFieldMap = fieldmap;
164158
}
165159
#endif
166-
// ========== run ==========
167-
checkCudaErrors(cudaEventRecord(start));
168-
// tqdm bar;
169-
using namespace indicators;
160+
// ========== run ==========
161+
auto start_sim = std::chrono::high_resolution_clock::now();
170162
ProgressBar bar{option::ShowPercentage{true}, option::Start{"["}, option::Fill{"="}, option::Lead{">"}, option::End{"]"}};
171163
simulation_parameters param_local;
172164
memcpy(&param_local, &param, sizeof(simulation_parameters));
@@ -204,9 +196,9 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
204196
T.data() + param.n_TE*param.n_spins*sl,
205197
spin);});
206198
}
199+
#ifdef __CUDACC__
207200
else
208201
{
209-
#ifdef __CUDACC__
210202
BOOST_LOG_TRIVIAL(info) << "GPU " << 1 << ") Fieldmap " << fieldmap_no << ", simulating sample length scale " << fov_scale[sl];
211203
checkCudaErrors(cudaMemcpy(d_param, &param_local, sizeof(simulation_parameters), cudaMemcpyHostToDevice));
212204
// scale position to mimic the different volume size
@@ -227,19 +219,19 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
227219
thrust::copy(d_XYZ1.begin(), d_XYZ1.end(), XYZ1.begin() + shift);
228220
shift = param.n_TE*param.n_spins*sl;
229221
thrust::copy(d_T.begin(), d_T.end(), T.begin() + shift);
230-
#endif
231222
}
223+
#endif
232224
// bar.progress(sl, param.n_fov_scale);
233225
bar.set_progress(100 * (sl+1)/float(param.n_fov_scale));
234226
}
235-
// bar.finish();
227+
228+
auto end_config = std::chrono::high_resolution_clock::now();
229+
auto elapsed_sim = std::chrono::duration_cast<std::chrono::milliseconds>(end_config - start_sim).count() / 1000.0;
230+
auto elapsed_config = std::chrono::duration_cast<std::chrono::seconds>(end_config - start_config).count();
231+
int precision = elapsed_sim>10 ? 0 : (elapsed_sim > 1 ? 1 : 3);
232+
std::cout << "Simulation took " << std::fixed << std::setprecision(precision) << elapsed_sim << " sec., everything else took " << elapsed_config - elapsed_sim - old_elapsed<< " sec.\n";
233+
old_elapsed = elapsed_config;
236234

237-
float elapsedTime;
238-
checkCudaErrors(cudaEventRecord(end));
239-
checkCudaErrors(cudaEventSynchronize(end));
240-
checkCudaErrors(cudaDeviceSynchronize());
241-
checkCudaErrors(cudaEventElapsedTime(&elapsedTime, start, end));
242-
std::cout << "Simulation over " << 1 << " GPU(s) took " << std::fixed << std::setprecision(2) << elapsedTime/1000. << " second(s)" << '\n';
243235
// ========== save results ==========
244236
std::cout << "Saving the results to disk." << "\n";
245237
std::string f = filenames.at("output")[fieldmap_no];
@@ -262,11 +254,11 @@ bool run(simulation_parameters param, std::map<std::string, std::vector<std::str
262254
file_utils::save_h5(f, fov_scale.data(), dims, "scales", "double");
263255
}
264256

265-
// ========== clean up GPU ==========
257+
#ifdef __CUDACC__
258+
// ============ clean up GPU ============
266259
checkCudaErrors(cudaFree(d_param));
267-
checkCudaErrors(cudaEventDestroy(start));
268-
checkCudaErrors(cudaEventDestroy(end));
269-
260+
#endif
261+
270262
return true;
271263
}
272264

@@ -294,7 +286,7 @@ int main(int argc, char * argv[])
294286
bool bStatus = true;
295287
std::string phantom_output;
296288
float phantom_radius, phantom_fov, phantom_dchi, phantom_oxy_level, phantom_orientation, phantom_BVF;
297-
int32_t phantom_resolution, phantom_seed;
289+
int32_t phantom_resolution, phantom_seed, device_id = 0;
298290
std::vector<std::string> config_files;
299291
print_logo();
300292
// ========== parse command line arguments ==========
@@ -304,7 +296,8 @@ int main(int argc, char * argv[])
304296
("help,h", "help message (this menu)")
305297
("configs,c", po::value<std::vector<std::string>>(&config_files)->multitoken(), "config. files as many as you want. e.g. -c config1.ini config2.ini ... configN.ini")
306298
#ifdef __CUDACC__
307-
("no_gpu,g", "only run on CPU(s) (default: GPU)")
299+
("use_cpu,p", "only run on CPU (default: GPU)")
300+
("device,g",po::value<int32_t>(&device_id)->default_value(0), "select GPU device (if there are multiple GPUs)")
308301
#endif
309302
("cylinder,C", "generate phantom filled with cylinders")
310303
("sphere,S", "generate phantom filled with spheres")
@@ -330,7 +323,8 @@ int main(int argc, char * argv[])
330323
return 1;
331324
}
332325

333-
auto fileSink = bl::add_file_log(bl::keywords::file_name=LOG_FILE, bl::keywords::target_file_name = LOG_FILE, bl::keywords::format = "[%TimeStamp%] [%Severity%]: %Message%", bl::keywords::auto_flush = true);
326+
std::string log_filename = "spinwalk_" + std::to_string(device_id) + ".log";
327+
auto fileSink = bl::add_file_log(bl::keywords::file_name=log_filename, bl::keywords::target_file_name = log_filename, bl::keywords::format = "[%TimeStamp%] [%Severity%]: %Message%", bl::keywords::auto_flush = true);
334328
bl::add_common_attributes();
335329

336330
// ========== print help ==========
@@ -341,7 +335,7 @@ int main(int argc, char * argv[])
341335
return 0;
342336
}
343337

344-
std::cout << "Log file location: " << std::filesystem::current_path() / LOG_FILE << '\n';
338+
std::cout << "Log file location: " << std::filesystem::current_path() / log_filename << '\n';
345339

346340
// ========== generate phantom ==========
347341
if (vm.count("cylinder"))
@@ -360,7 +354,7 @@ int main(int argc, char * argv[])
360354
return 0;
361355

362356
std::cout << "Running simulation for " << config_files.size() << " config(s)..." << "\n\n";
363-
auto start = std::chrono::steady_clock::now();
357+
auto start = std::chrono::high_resolution_clock::now();
364358
for(const auto& cfile : config_files)
365359
{
366360
std::cout << "<" << std::filesystem::path(cfile).filename().string() << ">\n";
@@ -372,7 +366,7 @@ int main(int argc, char * argv[])
372366

373367
std::vector<double> fov_scale;
374368
simulation_parameters param;
375-
param.no_gpu = vm.count("no_gpu") > 0;
369+
param.no_gpu = vm.count("use_cpu") > 0;
376370

377371
// ========== read config file ==========
378372
bStatus &= file_utils::read_config(cfile, &param, fov_scale, filenames);
@@ -385,18 +379,30 @@ int main(int argc, char * argv[])
385379

386380
if (bStatus == false)
387381
{
388-
std::cout << ERR_MSG << "Simulation failed. See the log file " << LOG_FILE <<", Aborting...!" << std::endl;
382+
std::cout << ERR_MSG << "Simulation failed. See the log file " << log_filename <<", Aborting...!" << std::endl;
389383
return 1;
390384
}
391385

386+
#ifdef __CUDACC__
387+
if(param.no_gpu == false)
388+
{
389+
if (device_id >= getDeviceCount())
390+
{
391+
std::cout << ERR_MSG << "Device ID " << device_id << " is not available! Aborting...!" << std::endl;
392+
return 1;
393+
}
394+
cudaSetDevice(device_id);
395+
}
396+
#endif
397+
392398
std::cout << "Simulation starts..." << std::endl;
393399
if(run(param, filenames, fov_scale) == false)
394400
{
395-
std::cout << ERR_MSG << "Simulation failed. See the log file " << LOG_FILE <<", Aborting...!" << std::endl;
401+
std::cout << ERR_MSG << "Simulation failed. See the log file " << log_filename <<", Aborting...!" << std::endl;
396402
return 1;
397403
}
398404
std::cout << "\n";
399405
}
400-
std::cout << "Simulation(s) finished successfully! Total elapsed time = " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count()/1000. << " second(s)." << std::endl;
406+
std::cout << "Simulation(s) finished successfully! Total elapsed time = " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count()/1000. << " second(s)." << std::endl;
401407
return 0;
402408
}

src/version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
#define SPINWALK_VERSION_MAJOR 1
88
#define SPINWALK_VERSION_MINOR 13
9-
#define SPINWALK_VERSION_PATCH 7
9+
#define SPINWALK_VERSION_PATCH 8
1010

1111
//---------------------------------------------------------------------------------------------
1212
//

0 commit comments

Comments
 (0)