From bb767a280efa62b9e7b6a8bf3765194e1c93cd25 Mon Sep 17 00:00:00 2001 From: Rafael Sachetto Date: Thu, 6 Jun 2024 11:54:54 -0300 Subject: [PATCH] Simple eikonal solver implementation based on https://github.com/SCIInstitute/StructuredEikonal --- bsbash/find_functions.sh | 12 +- build.sh | 4 +- .../plain_mesh_no_fibrosis_example.ini | 10 +- src/common_types/common_types.h | 42 +- src/eikonal/build.sh | 10 + src/eikonal/common_def.h | 60 +++ src/eikonal/cuda_fim.cu | 226 +++++++++ src/eikonal/cuda_fim.h | 21 + src/eikonal/cuda_fim_kernel.cu | 424 +++++++++++++++++ src/eikonal/cuda_fim_kernel.h | 41 ++ src/eikonal/eikonal_solver.c | 427 ++++++++++++++++++ src/eikonal/eikonal_solver.h | 26 ++ src/logger/logger.h | 2 + src/main_eikonal.c | 333 ++------------ src/utils/build.sh | 4 +- src/utils/file_utils.h | 2 - src/utils/heap.c | 90 ---- src/utils/heap.h | 18 - 18 files changed, 1340 insertions(+), 412 deletions(-) create mode 100644 src/eikonal/build.sh create mode 100644 src/eikonal/common_def.h create mode 100644 src/eikonal/cuda_fim.cu create mode 100644 src/eikonal/cuda_fim.h create mode 100644 src/eikonal/cuda_fim_kernel.cu create mode 100644 src/eikonal/cuda_fim_kernel.h create mode 100644 src/eikonal/eikonal_solver.c create mode 100644 src/eikonal/eikonal_solver.h delete mode 100644 src/utils/heap.c delete mode 100644 src/utils/heap.h diff --git a/bsbash/find_functions.sh b/bsbash/find_functions.sh index 1d0b892d..3a0d17bb 100644 --- a/bsbash/find_functions.sh +++ b/bsbash/find_functions.sh @@ -8,8 +8,14 @@ FIND_CUDA () { NVCC="" CUDA_FOUND="" + LD_CONFIG=ldconfig + + if [ "$OS" == "openSUSE Tumbleweed" ]; then + LD_CONFIG=/sbin/ldconfig + fi + if [ -z "$CUDA_LIBRARY_PATH" ]; then - CUDA_LIBRARY_PATH=$(dirname "$(ldconfig -p | grep libcudart | awk '{print $4}' | head -n 1 | head -c -5)" 2> /dev/null) + CUDA_LIBRARY_PATH=$(dirname "$($LD_CONFIG -p | grep libcudart | awk '{print $4}' | head -n 1 | head -c -5)" 2> /dev/null) fi if [ -z "$CUDA_INCLUDE_PATH" ]; then @@ -19,6 +25,8 @@ FIND_CUDA () { elif [ "$OS" == "Fedora" ]; then CUDA_INCLUDE_PATH="/usr/local/cuda/include" CUDA_LIBRARY_PATH="/usr/local/cuda/lib64" + elif [ "$OS" == "openSUSE Tumbleweed" ]; then + CUDA_INCLUDE_PATH="/usr/local/cuda/include" else if [ "$CI" = true ]; then CUDA_INCLUDE_PATH='/usr/local/cuda/include' @@ -120,7 +128,7 @@ FIND_MPI () { } FIND_AMGX() { - AMGX_LIBRARIES="" + AMGX_LIBRARIES="amgxsh" AMGX_LIBRARY_PATH="" AMGX_INCLUDE_PATH="" AMGX_FOUND="" diff --git a/build.sh b/build.sh index b2cbfa95..ae1d54a9 100755 --- a/build.sh +++ b/build.sh @@ -185,6 +185,7 @@ ADD_SUBDIRECTORY "src/3dparty/tinyexpr" ADD_SUBDIRECTORY "src/3dparty/miniz" ADD_SUBDIRECTORY "src/vtk_utils" ADD_SUBDIRECTORY "src/ensight_utils" +ADD_SUBDIRECTORY "src/eikonal/" #DINAMIC DEPS @@ -265,7 +266,6 @@ if [ -n "$COMPILE_MPI" ]; then COMPILE_EXECUTABLE "MonoAlg3D_batch" "$SRC_FILES" "$HDR_FILES" "$STATIC_DEPS" "$DYNAMIC_DEPS" "$EXECUTABLES_LIBRARY_PATH $EXTRA_LIB_PATH" "$INCLUDE_P -I$MPI_INCLUDE_PATH" fi - fi fi @@ -296,7 +296,7 @@ if [ -n "$COMPILE_CLIP" ]; then fi if [ -n "$COMPILE_EIKONAL" ]; then - COMPILE_EXECUTABLE "MonoAlg3D_eikonal_solver" "src/main_eikonal.c" "" "$STATIC_DEPS" "$DYNAMIC_DEPS" "$EXECUTABLES_LIBRARY_PATH $EXTRA_LIB_PATH" + COMPILE_EXECUTABLE "MonoAlg3D_eikonal_solver" "src/main_eikonal.c" "" "$STATIC_DEPS" "$DYNAMIC_DEPS eikonal_solver" "$EXECUTABLES_LIBRARY_PATH $EXTRA_LIB_PATH" fi diff --git a/example_configs/plain_mesh_no_fibrosis_example.ini b/example_configs/plain_mesh_no_fibrosis_example.ini index 1a4d7472..58098472 100644 --- a/example_configs/plain_mesh_no_fibrosis_example.ini +++ b/example_configs/plain_mesh_no_fibrosis_example.ini @@ -23,7 +23,7 @@ file_prefix=V [assembly_matrix] init_function=set_initial_conditions_fvm sigma_x=0.0000176 -sigma_y=0.0001334 +sigma_y=0.0000176 sigma_z=0.0000176 library_file=shared_libs/libdefault_matrix_assembly.so main_function=homogeneous_sigma_assembly_matrix @@ -65,7 +65,9 @@ start = 0.0 duration = 2.0 period = 250.0 current = -50.0 -x_limit = 100.0 -main_function=stim_if_x_less_than +min_x = 0.0 +max_x = 400.0 +min_y = 0.0 +max_y = 400.0 +main_function=stim_x_y_limits ;//////////////////////////////////////////////// - diff --git a/src/common_types/common_types.h b/src/common_types/common_types.h index 09f6049f..d55fb782 100644 --- a/src/common_types/common_types.h +++ b/src/common_types/common_types.h @@ -1,9 +1,9 @@ #ifndef MONOALG3D_COMMON_TYPES_H #define MONOALG3D_COMMON_TYPES_H +#include "../3dparty/sds/sds.h" #include #include -#include "../3dparty/sds/sds.h" #define Pragma(x) _Pragma(#x) #define OMP(directive) Pragma(omp directive) @@ -17,6 +17,42 @@ typedef double real; typedef float real; #endif +#define EPS (real)1e-16 + +#define ALLOCATE_3D_ARRAY(array, type, size_x, size_y, size_z) \ + do { \ + array = (type ***)malloc(size_x * sizeof(type **)); \ + \ + if(array == NULL) { \ + log_error_and_exit("Memory allocation failed for first dimension\n"); \ + } \ + \ + for(int i = 0; i < size_x; ++i) { \ + array[i] = (type **)malloc(size_y * sizeof(type *)); \ + if(array[i] == NULL) { \ + log_error_and_exit("Memory allocation failed for second dimension\n"); \ + } \ + \ + for(int j = 0; j < size_y; ++j) { \ + array[i][j] = (type *)calloc(size_z, sizeof(type)); \ + if(array[i][j] == NULL) { \ + log_error_and_exit("Memory allocation failed for third dimension\n"); \ + } \ + } \ + } \ + } while(0) + +#define FREE_3D_ARRAY(array, size_x, size_y) \ + do { \ + for(int i = 0; i < size_x; ++i) { \ + for(int j = 0; j < size_y; ++j) { \ + free(array[i][j]); \ + } \ + free(array[i]); \ + } \ + free(array); \ + } while(0) + #define MALLOC_BYTES(type, bytes) (type *)malloc(bytes) #define MALLOC_ONE_TYPE(type) (type *)malloc(sizeof(type)) #define MALLOC_ARRAY_OF_TYPE(type, n) (type *)malloc(sizeof(type) * (n)) @@ -142,8 +178,8 @@ struct simulation_files { #define STRING_HASH_PRINT_KEY_VALUE_LOG(tag, d) \ do { \ for(int64_t __i = 0; __i < shlen(d); __i++) { \ - struct string_hash_entry __e = (d)[__i]; \ - log_info("%s %s = %s\n", tag, __e.key, __e.value); \ + struct string_hash_entry __e = (d)[__i]; \ + log_info("%s %s = %s\n", tag, __e.key, __e.value); \ } \ } while(0) diff --git a/src/eikonal/build.sh b/src/eikonal/build.sh new file mode 100644 index 00000000..f3162177 --- /dev/null +++ b/src/eikonal/build.sh @@ -0,0 +1,10 @@ +if [ -n "$CUDA_FOUND" ]; then + + EIKONAL_SOURCE_FILES="eikonal_solver.c cuda_fim.cu cuda_fim_kernel.cu" + EIKONAL_HEADER_FILES="eikonal_solver.h common_def.h cuda_fim.h cuda_fim_kernel.h" + EIKONAL_EXTRA_LIB_PATH=$CUDA_LIBRARY_PATH + EIKONAL_DYNAMIC_LIBS="c cudart" + + COMPILE_SHARED_LIB "eikonal_solver" "$EIKONAL_SOURCE_FILES" "$EIKONAL_HEADER_FILES" "" "$EIKONAL_DYNAMIC_LIBS" "$EIKONAL_EXTRA_LIB_PATH" "" "$CUDA_FOUND" + +fi diff --git a/src/eikonal/common_def.h b/src/eikonal/common_def.h new file mode 100644 index 00000000..326a16c4 --- /dev/null +++ b/src/eikonal/common_def.h @@ -0,0 +1,60 @@ +// +// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations +// +// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr) +// +// 2016. 2. 4 +// + +// +// Common to entire project +// + +#ifndef __COMMON_DEF_H__ +#define __COMMON_DEF_H__ + +#include +#include "../common_types/common_types.h" + +// +// common definition for Eikonal solvers +// +#ifndef INF +#define INF 1e20//FLT_MAX // +#endif + +#define BLOCK_LENGTH 4 + +// +// itk image volume definition for 3D anisotropic eikonal solvers +// +typedef unsigned int uint; +typedef unsigned char uchar; + +struct CUDA_MEM_STRUCTURE { + // volsize/blksize : # of pixel in volume/block + // blknum : # of block + // blklength : # of pixel in one dimemsion of block + uint nActiveBlock, blknum, volsize, blksize; + int xdim, ydim, zdim, nIter, blklength; // new new x,y,z dim to aligh power of 4 + + // host memory + uint *h_list; + bool *h_listVol, *h_listed; + + // device memory + uint *d_list; + real *d_spd; + bool *d_mask, *d_listVol, *d_con; + + real *h_sol;//h_speedtable[256]; + real *d_sol, *t_sol; + + // GroupOrder + int* blockOrder; + int K; +}; + +typedef struct CUDA_MEM_STRUCTURE CUDAMEMSTRUCT; + +#endif diff --git a/src/eikonal/cuda_fim.cu b/src/eikonal/cuda_fim.cu new file mode 100644 index 00000000..901c088c --- /dev/null +++ b/src/eikonal/cuda_fim.cu @@ -0,0 +1,226 @@ +// +// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations +// +// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr) +// +// 2016. 2. 4 +// + +#include +#include +#include "cuda_fim_kernel.h" +#include "cuda_fim.h" +#include "../gpu_utils/gpu_utils.h" +#include + +void run_eikonal_solver_simple(CUDAMEMSTRUCT *cmem, bool verbose) { + + int deviceID; + cudaGetDevice(&deviceID); + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, deviceID); + if (verbose) { + printf("Current device id : %d, name : %s\n", deviceID, deviceProp.name); + } + + int xdim, ydim, zdim; + xdim = cmem->xdim; + ydim = cmem->ydim; + zdim = cmem->zdim; + + // create volumes + uint volSize = cmem->volsize; + uint blockNum = cmem->blknum; + + if (verbose) { + printf("# of total voxels : %d\n", volSize); + printf("# of total blocks : %d\n", blockNum); + } + + // h_ : host memory, d_ : device memory + int nIter = cmem->nIter; + uint nActiveBlock = cmem->nActiveBlock; // active list + + real*d_spd = cmem->d_spd; + real *d_sol = cmem->d_sol; + real *t_sol = cmem->t_sol; + + uint *d_list = cmem->d_list; + bool *d_listVol = cmem->d_listVol; + + bool *d_con = cmem->d_con; + bool *d_mask = cmem->d_mask; + + // copy so that original value should not be modified + uint *h_list = (uint*) malloc(blockNum*sizeof(uint)); + bool *h_listed = (bool*) malloc(blockNum*sizeof(bool)); + bool *h_listVol = (bool*) malloc(blockNum*sizeof(bool)); + + // initialization + memcpy(h_list, cmem->h_list, blockNum*sizeof(uint)); + memcpy(h_listed, cmem->h_listed, blockNum*sizeof(bool)); + memcpy(h_listVol, cmem->h_listVol, blockNum*sizeof(bool)); + + check_cuda_error( cudaMemcpy(cmem->d_list, cmem->h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemcpy(cmem->d_listVol, cmem->h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemcpy(cmem->d_sol, cmem->h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemcpy(cmem->t_sol, cmem->h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemset(cmem->d_con, 1, volSize*sizeof(bool)) ); + + // set dimension of block and entire grid size + dim3 dimBlock(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH); + dim3 dimEntireGrid(blockNum); + dim3 dimGrid(nActiveBlock); + + int nTotalIter = 0; + + uint nTotalBlockProcessed = 0; + + // start solver + while(nActiveBlock > 0) + { + assert(nActiveBlock < 4294967295); + + nTotalBlockProcessed += nActiveBlock; + + nTotalIter++; + + // + // solve current blocks in the active lists + // + + // printf("# of active tiles : %u\n", nActiveBlock); + if (verbose) { + printf("# of active tiles : %u\n", nActiveBlock); + } + ////////////////////////////////////////////////////////////////// + // 1. run solver on current active tiles + + dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1; + dimGrid.x = (unsigned int)ceil ((double)nActiveBlock/(double)dimGrid.y); + + if (verbose) { + printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y); + } + + check_cuda_error( cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) ); + + run_solver<<< dimGrid, dimBlock >>>(d_spd, d_mask, d_sol, t_sol, d_con, d_list, xdim, ydim, zdim, nIter, nActiveBlock); + + check_cuda_error(cudaGetLastError()); + + cudaDeviceSynchronize(); + + + ////////////////////////////////////////////////////////////////// + // 2. reduction (only active tiles) + + run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock); + + check_cuda_error(cudaGetLastError()); + + cudaDeviceSynchronize(); + + ////////////////////////////////////////////////////////////////// + // 3. check neighbor tiles of converged tile + // Add any active block of neighbor of converged block is inserted + // to the list + + check_cuda_error( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) ); + + uint nOldActiveBlock = nActiveBlock; + uint nBlkX = xdim/BLOCK_LENGTH; + uint nBlkY = ydim/BLOCK_LENGTH; + + for(uint i=0; i= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX*nBlkY); //bt + nb[2] = (currBlkIdx < nBlkX) ? currBlkIdx : (currBlkIdx - nBlkX); //up + nb[3] = ((currBlkIdx + nBlkX) >= blockNum) ? currBlkIdx : (currBlkIdx + nBlkX); //dn + nb[4] = (currBlkIdx%nBlkX == 0) ? currBlkIdx : currBlkIdx-1; //lf + nb[5] = ((currBlkIdx+1)%nBlkX == 0) ? currBlkIdx : currBlkIdx+1; //rt + + for(int nbIdx = 0; nbIdx < 6; nbIdx++) + { + uint currIdx = nb[nbIdx]; + + if(!h_listed[currIdx]) + { + h_listed[currIdx] = true; + h_list[nActiveBlock++] = currIdx; + } + } + } + } + cudaDeviceSynchronize(); + + ////////////////////////////////////////////////////////////////// + // 4. run solver only once for neighbor blocks of converged block + // current active list contains active blocks and neighbor blocks of + // any converged blocks. + // + + // update grid dimension because nActiveBlock is changed + dimGrid.y = (unsigned int)floor(((double)nActiveBlock-1)/65535)+1; + dimGrid.x = (unsigned int)ceil((double)nActiveBlock/(double)dimGrid.y); + + if (verbose) { + printf("Grid size : %d x %d\n", dimGrid.x, dimGrid.y); + } + + check_cuda_error(cudaMemcpy(d_list, h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) ); + run_check_neighbor<<< dimGrid, dimBlock >>>(d_spd, d_mask, t_sol, d_sol, d_con, d_list, xdim, ydim, zdim, nOldActiveBlock, nActiveBlock); + check_cuda_error(cudaGetLastError()); + cudaDeviceSynchronize(); + + + ////////////////////////////////////////////////////////////////// + // 5. reduction + + run_reduction<<< dimGrid, dim3(BLOCK_LENGTH,BLOCK_LENGTH,BLOCK_LENGTH/2) >>>(d_con, d_listVol, d_list, nActiveBlock); + check_cuda_error(cudaGetLastError()); + cudaDeviceSynchronize(); + + + ////////////////////////////////////////////////////////////////// + // 6. update active list + // read back active volume from the device and add + // active block to active list on the host memory + + + nActiveBlock = 0; + check_cuda_error( cudaMemcpy(h_listVol, d_listVol, blockNum*sizeof(bool), cudaMemcpyDeviceToHost) ); + + for(uint i=0; i b > c + if(a < b) { tmp = a; a = b; b = tmp; } + if(b < c) { tmp = b; b = c; c = tmp; } + if(a < b) { tmp = a; a = b; b = tmp; } + + ret = INF; + + if(c < INF) + { + ret = c + s; + + if(ret > b) + { + tmp = ((b+c) + sqrtf(2.0f*s*s-(b-c)*(b-c)))*0.5f; + + if(tmp > b) ret = tmp; + + if(ret > a) { + tmp = (a+b+c)/3.0f + sqrtf(2.0f*(a*(b-a)+b*(c-b)+c*(a-c))+3.0f*s*s)/3.0f; + + if(tmp > a) ret = tmp; + } + } + } + + return ret; +} + +__global__ void run_solver(real* spd, bool* mask, const real *sol_in, real *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim, int nIter, uint nActiveBlock) +{ + uint list_idx = blockIdx.y*gridDim.x + blockIdx.x; + + if(list_idx < nActiveBlock) + { + // retrieve actual block index from the active list + uint block_idx = list[list_idx]; + + real F; + bool isValid; + uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH; + uint base_addr = block_idx*blocksize; + + uint xgridlength = xdim/BLOCK_LENGTH; + uint ygridlength = ydim/BLOCK_LENGTH; + uint zgridlength = zdim/BLOCK_LENGTH; + + // compute block index + uint bx = block_idx%xgridlength; + uint tmpIdx = (block_idx - bx)/xgridlength; + uint by = tmpIdx%ygridlength; + uint bz = (tmpIdx-by)/ygridlength; + + uint tx = threadIdx.x; + uint ty = threadIdx.y; + uint tz = threadIdx.z; + uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx; + + __shared__ real _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2]; + + // copy global to shared memory + dim3 idx(tx+1,ty+1,tz+1); + + SOL(idx.x,idx.y,idx.z) = sol_in[base_addr + tIdx]; + F = spd[base_addr + tIdx]; + if(F > 0) F = 1.0/F; // F = 1/f + isValid = mask[base_addr + tIdx]; + + uint new_base_addr, new_tIdx; + + // 1-neighborhood values + if(tx == 0) + { + if(bx == 0) // end of the grid + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx + BLOCK_LENGTH-1; + new_base_addr = (block_idx - 1)*blocksize; + } + + SOL(tx,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx]; + } + + if(tx == BLOCK_LENGTH-1) + { + if(bx == xgridlength-1) // end of the grid + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx - (BLOCK_LENGTH-1); + new_base_addr = (block_idx + 1)*blocksize; + } + SOL(tx+2,idx.y,idx.z) = sol_in[new_base_addr + new_tIdx]; + } + + if(ty == 0) + { + if(by == 0) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH; + new_base_addr = (block_idx - xgridlength)*blocksize; + } + + SOL(idx.x,ty,idx.z) = sol_in[new_base_addr + new_tIdx]; + } + + if(ty == BLOCK_LENGTH-1) + { + if(by == ygridlength-1) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH; + new_base_addr = (block_idx + xgridlength)*blocksize; + } + + SOL(idx.x,ty+2,idx.z) = sol_in[new_base_addr + new_tIdx]; + } + + if(tz == 0) + { + if(bz == 0) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH; + new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize; + } + + SOL(idx.x,idx.y,tz) = sol_in[new_base_addr + new_tIdx]; + } + + if(tz == BLOCK_LENGTH-1) + { + if(bz == zgridlength-1) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH; + new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize; + } + + SOL(idx.x,idx.y,tz+2) = sol_in[new_base_addr + new_tIdx]; + } + + __syncthreads(); + + real a,b,c,oldT,newT; + + for(int iter=0; iter0; i/=2) + { + if(tIdx < i) + { + bool b1, b2; + b1 = conv[tIdx]; + b2 = conv[tIdx+i]; + conv[tIdx] = (b1 && b2) ? true : false ; + } + __syncthreads(); + } + + if(tIdx == 0) + { + listVol[block_idx] = !conv[0]; // active list is negation of tile convergence (active = not converged) + } + } +} + +__global__ void run_check_neighbor(real* spd, bool* mask, const real *sol_in, real *sol_out, + bool *con, uint* list, int xdim, int ydim, int zdim, + uint nActiveBlock, uint nTotalBlock) +{ + + uint list_idx = blockIdx.y*gridDim.x + blockIdx.x; + + if(list_idx < nTotalBlock) + { + real F; + bool isValid; + __shared__ real _sol[BLOCK_LENGTH+2][BLOCK_LENGTH+2][BLOCK_LENGTH+2]; + + uint block_idx = list[list_idx]; + uint blocksize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH; + uint base_addr = block_idx*blocksize; + + uint tx = threadIdx.x; + uint ty = threadIdx.y; + uint tz = threadIdx.z; + uint tIdx = tz*BLOCK_LENGTH*BLOCK_LENGTH + ty*BLOCK_LENGTH + tx; + + if(list_idx < nActiveBlock) // copy value + { + sol_out[base_addr + tIdx] = sol_in[base_addr + tIdx]; + } + else + { + uint xgridlength = xdim/BLOCK_LENGTH; + uint ygridlength = ydim/BLOCK_LENGTH; + uint zgridlength = zdim/BLOCK_LENGTH; + + // compute block index + uint bx = block_idx%xgridlength; + uint tmpIdx = (block_idx - bx)/xgridlength; + uint by = tmpIdx%ygridlength; + uint bz = (tmpIdx-by)/ygridlength; + + // copy global to shared memory + dim3 idx(tx+1,ty+1,tz+1); + _sol[idx.x][idx.y][idx.z] = sol_in[base_addr + tIdx]; + F = spd[base_addr + tIdx]; + if(F > 0) F = 1.0/F; + isValid = mask[base_addr + tIdx]; + + uint new_base_addr, new_tIdx; + + // 1-neighborhood values + if(tx == 0) + { + if(bx == 0) // end of the grid + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx + BLOCK_LENGTH-1; + new_base_addr = (block_idx - 1)*blocksize; + } + _sol[tx][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx]; + } + + if(tx == BLOCK_LENGTH-1) + { + if(bx == xgridlength-1) // end of the grid + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx - (BLOCK_LENGTH-1); + new_base_addr = (block_idx + 1)*blocksize; + } + _sol[tx+2][idx.y][idx.z] = sol_in[new_base_addr + new_tIdx]; + } + + if(ty == 0) + { + if(by == 0) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH; + new_base_addr = (block_idx - xgridlength)*blocksize; + } + _sol[idx.x][ty][idx.z] = sol_in[new_base_addr + new_tIdx]; + } + + if(ty == BLOCK_LENGTH-1) + { + if(by == ygridlength-1) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH; + new_base_addr = (block_idx + xgridlength)*blocksize; + } + _sol[idx.x][ty+2][idx.z] = sol_in[new_base_addr + new_tIdx]; + } + + if(tz == 0) + { + if(bz == 0) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx + (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH; + new_base_addr = (block_idx - xgridlength*ygridlength)*blocksize; + } + _sol[idx.x][idx.y][tz] = sol_in[new_base_addr + new_tIdx]; + } + + if(tz == BLOCK_LENGTH-1) + { + if(bz == zgridlength-1) + { + new_tIdx = tIdx; + new_base_addr = base_addr; + } + else + { + new_tIdx = tIdx - (BLOCK_LENGTH-1)*BLOCK_LENGTH*BLOCK_LENGTH; + new_base_addr = (block_idx + xgridlength*ygridlength)*blocksize; + } + _sol[idx.x][idx.y][tz+2] = sol_in[new_base_addr + new_tIdx]; + } + + __syncthreads(); + + + real a,b,c,oldT,newT; + + // + // compute new value + // + oldT = newT = _sol[idx.x][idx.y][idx.z]; + + if(isValid) + { + a = min(_sol[tx][idx.y][idx.z],_sol[tx+2][idx.y][idx.z]); + b = min(_sol[idx.x][ty][idx.z],_sol[idx.x][ty+2][idx.z]); + c = min(_sol[idx.x][idx.y][tz],_sol[idx.x][idx.y][tz+2]); + + real tmp = (real) get_time_eikonal(a, b, c, F); + newT = min(tmp,oldT); + + sol_out[base_addr + tIdx] = newT; + } + // write back to global memory + real residue = oldT - newT; + con[base_addr + tIdx] = (residue < EPS) ? true : false; + } + } +} + + diff --git a/src/eikonal/cuda_fim_kernel.h b/src/eikonal/cuda_fim_kernel.h new file mode 100644 index 00000000..874b49a2 --- /dev/null +++ b/src/eikonal/cuda_fim_kernel.h @@ -0,0 +1,41 @@ +// +// CUDA implementation of FIM (Fast Iterative Method) for Eikonal equations +// +// Copyright (c) Won-Ki Jeong (wkjeong@unist.ac.kr) +// +// 2016. 2. 4 +// + +#ifndef _cuda_fim_KERNEL_H_ +#define _cuda_fim_KERNEL_H_ + +#include "common_def.h" + +#define MEM(index) _mem[index] +#define SOL(i,j,k) _sol[i][j][k] +#define SPD(i,j,k) _spd[i][j][k] + +__device__ real get_time_eikonal(real a, real b, real c, real s); +// +// F : Input speed (positive) +// if F =< 0, skip that pixel (masking out) +// +__global__ void run_solver(real* spd, bool* mask, const real *sol_in, + real *sol_out, bool *con, uint* list, int xdim, int ydim, int zdim, + int nIter, uint nActiveBlock); +// +// run_reduction +// +// con is pixelwise convergence. Do reduction on active tiles and write tile-wise +// convergence to listVol. The implementation assumes that the block size is 4x4x4. +// +__global__ void run_reduction(bool *con, bool *listVol, uint *list, uint nActiveBlock); +// +// if block is active block, copy values +// if block is neighbor, run solver once +// +__global__ void run_check_neighbor(real* spd, bool* mask, const real *sol_in, real *sol_out, + bool *con, uint* list, int xdim, int ydim, int zdim, + uint nActiveBlock, uint nTotalBlock); +#endif // #ifndef _cuda_fim_KERNEL_H_ + diff --git a/src/eikonal/eikonal_solver.c b/src/eikonal/eikonal_solver.c new file mode 100644 index 00000000..e4eed189 --- /dev/null +++ b/src/eikonal/eikonal_solver.c @@ -0,0 +1,427 @@ +#include +#include +#include "eikonal_solver.h" +#include "../gpu_utils/gpu_utils.h" +#include "../logger/logger.h" +#include +#include +#include +#include "../3dparty/stb_ds.h" +#include "cuda_fim.h" + +struct eikonal_solver * new_eikonal_solver(bool verbose) { + + struct eikonal_solver *solver = MALLOC_ONE_TYPE(struct eikonal_solver); + solver->verbose = verbose; + solver->cuda_mem_created = false; + solver->width = 256; + solver->height = 256; + solver->depth = 256; + solver->iters_per_block = 10; + solver->solver_type = 0; + solver->speeds = NULL; + solver->seeds = NULL; + solver->mask = NULL; + + return solver; +} + +void free_eikonal_solver(struct eikonal_solver *solver) { + + //TODO: free 3D arrays + FREE_3D_ARRAY(solver->speeds, solver->width, solver->height); + FREE_3D_ARRAY(solver->answer, solver->width, solver->height); + FREE_3D_ARRAY(solver->mask, solver->width, solver->height); + + if(solver->cuda_mem_created) { + free(solver->memory_struct.h_sol); + free(solver->memory_struct.h_list); + free(solver->memory_struct.h_listed); + free(solver->memory_struct.h_listVol); + free(solver->memory_struct.blockOrder); + check_cuda_error( cudaFree(solver->memory_struct.d_spd) ); + check_cuda_error( cudaFree(solver->memory_struct.d_sol) ); + check_cuda_error( cudaFree(solver->memory_struct.t_sol) ); // temp solution for ping-pong + check_cuda_error( cudaFree(solver->memory_struct.d_con) ); // convergence volume + check_cuda_error( cudaFree(solver->memory_struct.d_list) ); + check_cuda_error( cudaFree(solver->memory_struct.d_listVol) ); + check_cuda_error( cudaFree(solver->memory_struct.d_mask) ); + } + + free(solver); +} + +void check_cuda_memory(struct eikonal_solver *solver) { + if (solver->verbose) { + size_t free_mem, total_mem; + cudaMemGetInfo(&free_mem, &total_mem); + + printf("Total Memory : %lld MB\n", total_mem / (1024LL * 1024LL)); + printf("Free Memory : %lld MB\n", free_mem / (1024LL * 1024LL)); + printf("--\n"); + } +} + +void init_cuda_mem(struct eikonal_solver *solver) { + + if(solver->width <= 0 || solver->height <= 0 || solver->depth <= 0) { + log_error_and_exit("Volume dimension cannot be zero"); + } + + check_cuda_memory(solver); + + // 1. Create /initialize GPU memory + size_t nx, ny, nz; + + nx = solver->width + (BLOCK_LENGTH-solver->width%BLOCK_LENGTH)%BLOCK_LENGTH; + ny = solver->height + (BLOCK_LENGTH-solver->height%BLOCK_LENGTH)%BLOCK_LENGTH; + nz = solver->depth + (BLOCK_LENGTH-solver->depth%BLOCK_LENGTH)%BLOCK_LENGTH; + + if (solver->verbose) { + printf("%ld %ld %ld \n",nx,ny,nz); + } + + size_t volSize = nx*ny*nz; + size_t blkSize = BLOCK_LENGTH*BLOCK_LENGTH*BLOCK_LENGTH; + + size_t nBlkX = nx / BLOCK_LENGTH; + size_t nBlkY = ny / BLOCK_LENGTH; + size_t nBlkZ = nz / BLOCK_LENGTH; + size_t blockNum = nBlkX*nBlkY*nBlkZ; + + solver->memory_struct.xdim = (int) nx; + solver->memory_struct.ydim = (int) ny; + solver->memory_struct.zdim = (int) nz; + solver->memory_struct.volsize = (uint) volSize; + solver->memory_struct.blksize = (uint) blkSize; + solver->memory_struct.blklength = BLOCK_LENGTH; + solver->memory_struct.blknum = (uint) blockNum; + solver->memory_struct.nIter = (int) solver->iters_per_block; // iter per block + + if(solver->cuda_mem_created) // delete previous memory + { + free(solver->memory_struct.h_sol); + free(solver->memory_struct.h_list); + free(solver->memory_struct.h_listed); + free(solver->memory_struct.h_listVol); + free(solver->memory_struct.blockOrder); + check_cuda_error( cudaFree(solver->memory_struct.d_spd) ); + check_cuda_error( cudaFree(solver->memory_struct.d_sol) ); + check_cuda_error( cudaFree(solver->memory_struct.t_sol) ); // temp solution for ping-pong + check_cuda_error( cudaFree(solver->memory_struct.d_con) ); // convergence volume + check_cuda_error( cudaFree(solver->memory_struct.d_list) ); + check_cuda_error( cudaFree(solver->memory_struct.d_listVol) ); + check_cuda_error( cudaFree(solver->memory_struct.d_mask) ); + } + solver->cuda_mem_created = true; + + solver->memory_struct.h_sol = (real*) malloc(volSize*sizeof(real)); // initial solution + solver->memory_struct.h_list = (uint*) malloc(blockNum*sizeof(uint)); // linear list contains active block indices + solver->memory_struct.h_listed = (bool*) malloc(blockNum*sizeof(bool)); // whether block is added to the list + solver->memory_struct.h_listVol = (bool*) malloc(blockNum*sizeof(bool)); // volume list shows active/nonactive of corresponding block + solver->memory_struct.blockOrder = (int*) malloc(blockNum*sizeof(int)); + + check_cuda_memory(solver); + + // + // create host/device memory using CUDA mem functions + // + check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_spd), volSize*sizeof(real)) ); + check_cuda_memory(solver); + + check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_sol), volSize*sizeof(real)) ); + check_cuda_memory(solver); + + check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.t_sol), volSize*sizeof(real)) ); // temp solution for ping-pong + check_cuda_memory(solver); + + check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_con), volSize*sizeof(bool)) ); // convergence volume + check_cuda_memory(solver); + + check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_list), blockNum*sizeof(uint)) ); + check_cuda_memory(solver); + + check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_listVol), blockNum*sizeof(bool)) ); + check_cuda_memory(solver); + + check_cuda_error( cudaMalloc((void**)&(solver->memory_struct.d_mask), volSize*sizeof(bool)) ); + check_cuda_memory(solver); +} + + +void set_attribute_mask(struct eikonal_solver *solver) { + + assert(solver); + assert(solver->speeds); + assert(solver->mask); + + uint volSize = solver->memory_struct.volsize; + + int nx, ny, nz, blklength; + + nx = solver->memory_struct.xdim; + ny = solver->memory_struct.ydim; + nz = solver->memory_struct.zdim; + blklength = solver->memory_struct.blklength; + + // create host memory + real *h_spd = MALLOC_ARRAY_OF_TYPE(real, volSize); // byte speed, host + bool *h_mask = MALLOC_ARRAY_OF_TYPE(bool, volSize); + + // copy input volume to host memory + // make each block to be stored contiguously in 1D memory space + uint idx = 0; + for(int zStr = 0; zStr < nz; zStr += blklength) { + for(int yStr = 0; yStr < ny; yStr += blklength) { + for(int xStr = 0; xStr < nx; xStr += blklength) { + // for each block + for(int z = zStr; z < zStr + blklength; z++) { + for(int y = yStr; y < yStr + blklength; y++) { + for(int x = xStr; x < xStr+blklength; x++) { + h_spd[idx] = solver->speeds[x][y][z]; + h_mask[idx] = solver->mask[x][y][z]; + idx++; + } + } + } + } + } + } + + // initialize GPU memory with host memory + check_cuda_error( cudaMemcpy(solver->memory_struct.d_spd, h_spd, volSize*sizeof(real), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemcpy(solver->memory_struct.d_mask, h_mask, volSize*sizeof(bool), cudaMemcpyHostToDevice) ); + + free(h_spd); + free(h_mask); +} + +static void init_attribute_mask(struct eikonal_solver *solver) { + + size_t size_x = solver->width; + size_t size_y = solver->height; + size_t size_z = solver->depth; + + ALLOCATE_3D_ARRAY(solver->mask, bool, size_x, size_y, size_z); + + size_t n_active = solver->num_active_cells; + struct cell_node **active_cells = solver->active_cells; + + for(size_t i = 0; i < n_active; i++) { + int x = (int) active_cells[i]->center.x; + int y = (int) active_cells[i]->center.y; + int z = (int) active_cells[i]->center.z; + solver->mask[x][y][z] = true; + } + +} + +static void initialization(struct eikonal_solver *solver) { + assert(solver); + check_cuda_memory(solver); + init_cuda_mem(solver); + init_attribute_mask(solver); + set_attribute_mask(solver); + check_cuda_memory(solver); +} + +static void get_solution(struct eikonal_solver *solver) { + // copy solution from GPU + check_cuda_error( cudaMemcpy(solver->memory_struct.h_sol, + solver->memory_struct.d_sol, + solver->memory_struct.volsize*sizeof(real), + cudaMemcpyDeviceToHost) ); + + size_t size_x = solver->width; + size_t size_y = solver->height; + size_t size_z = solver->depth; + + ALLOCATE_3D_ARRAY(solver->answer, real, size_x, size_y, size_z); + + for(size_t blockID = 0; blockID < solver->memory_struct.blknum; blockID++) { + size_t baseAddr = blockID * solver->memory_struct.blksize; + size_t xgridlength = solver->memory_struct.xdim/BLOCK_LENGTH; + size_t ygridlength = solver->memory_struct.ydim/BLOCK_LENGTH; + // compute block index + size_t bx = blockID%xgridlength; + size_t tmpIdx = (blockID - bx)/xgridlength; + size_t by = tmpIdx%ygridlength; + size_t bz = (tmpIdx-by)/ygridlength; + //translate back to real space + for(int k = 0; k < BLOCK_LENGTH; k++) { + for(int j = 0; j < BLOCK_LENGTH; j++) { + for(int i = 0; i < BLOCK_LENGTH; i++) { + double d = solver->memory_struct.h_sol[baseAddr + + k * BLOCK_LENGTH * BLOCK_LENGTH + + j * BLOCK_LENGTH + i]; + if ((i + bx * BLOCK_LENGTH) < solver->width && + (j + by * BLOCK_LENGTH) < solver->height && + (k + bz * BLOCK_LENGTH) < solver->depth) { + solver->answer[(i + bx * BLOCK_LENGTH)][(j + + by * BLOCK_LENGTH)][k + bz * BLOCK_LENGTH] = d; + } + } + } + } + } +} + +void map_generator(struct eikonal_solver *solver) { + + double pi = 3.141592653589793238462643383; + + size_t size_x = solver->width; + size_t size_y = solver->height; + size_t size_z = solver->depth; + + ALLOCATE_3D_ARRAY(solver->speeds, real, size_x, size_y, size_z); + + switch(solver->solver_type) { + case 0 : + //Constant Speed Map + for (int k = 0 ; k < solver->depth ; ++k) { + for (int j = 0 ; j < solver->height; ++j) { + for ( int i = 0 ; i < solver->width ; ++i) { + solver->speeds[i][j][k] = 1.0; + } + } + } + break; + case 1 : + //Sinusoid Speed Map + for (int k = 0 ; k < solver->depth ; ++k) { + for (int j = 0 ; j < solver->height; ++j) { + for ( int i = 0 ; i < solver->width ; ++i) { + solver->speeds[i][j][k] = + (6.0 + 5.0 *(sin((i*pi)/(double)solver->width * 2.0))* + sin((j*pi)/(double)solver->height*2.0)* + sin((k*pi)/(double)solver->depth*2.0)); + } + } + } + + break; + } +} + +void use_seeds(struct eikonal_solver *solver) { + + if (solver->verbose) { + printf("Loading seed volume...\n"); + } + uint volSize, blockNum; + int nx, ny, nz, blklength; + + nx = solver->memory_struct.xdim; + ny = solver->memory_struct.ydim; + nz = solver->memory_struct.zdim; + volSize = solver->memory_struct.volsize; + blklength = solver->memory_struct.blklength; + blockNum = solver->memory_struct.blknum; + + // copy input volume to host memory + // make each block to be stored contiguously in 1D memory space + uint idx = 0; + uint blk_idx = 0; + uint list_idx = 0; + uint nActiveBlock = 0; + + for(int zStr = 0; zStr < nz; zStr += blklength) { + for(int yStr = 0; yStr < ny; yStr += blklength) { + for(int xStr = 0; xStr < nx; xStr += blklength) { + // for each block + bool isSeedBlock = false; + + for(int z=zStr; zmemory_struct.h_sol[idx] = INF; + if (arrlen(solver->seeds) == 0) { + if (x == nx/2 && y == ny/2 && z == nz/2) { + solver->memory_struct.h_sol[idx] = 0; + isSeedBlock = true; + if (solver->verbose) { + printf("%d is Selected bt source \n",idx); + } + } + } else { + for(size_t i = 0; i < arrlen(solver->seeds); i++) { + if (solver->seeds[i][0] == x && + solver->seeds[i][1] == y && + solver->seeds[i][2] == z) { + solver->memory_struct.h_sol[idx] = 0; + isSeedBlock = true; + if (solver->verbose) { + printf("%d is Selected bt source \n",idx); + } + } + } + } + idx++; + } + } + } + /////////////////////////////////////////////// + if(isSeedBlock) { + if (solver->verbose) { + printf("%d,%d,%d is Seed Block \n",zStr,yStr,xStr); + } + solver->memory_struct.h_listVol[blk_idx] = true; + solver->memory_struct.h_listed[blk_idx] = true; + solver->memory_struct.h_list[list_idx] = blk_idx; + list_idx++; + nActiveBlock++; + } else { + solver->memory_struct.h_listVol[blk_idx] = false; + solver->memory_struct.h_listed[blk_idx] = false; + } + blk_idx++; + } + } + } + solver->memory_struct.nActiveBlock = nActiveBlock; + // initialize GPU memory with host memory + check_cuda_error( cudaMemcpy(solver->memory_struct.d_sol, solver->memory_struct.h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemcpy(solver->memory_struct.t_sol, solver->memory_struct.h_sol, volSize*sizeof(real), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemcpy(solver->memory_struct.d_list, solver->memory_struct.h_list, nActiveBlock*sizeof(uint), cudaMemcpyHostToDevice) ); + check_cuda_error( cudaMemcpy(solver->memory_struct.d_listVol, solver->memory_struct.h_listVol, blockNum*sizeof(bool), cudaMemcpyHostToDevice) ); + // initialize GPU memory with constant value + check_cuda_error( cudaMemset(solver->memory_struct.d_con, 1, volSize*sizeof(bool)) ); +} + +void write_alg(struct eikonal_solver *solver, char *filename) { + + FILE *out = fopen(filename, "w"); + + if (out == NULL) { + log_error_and_exit("Error opening file: %s\n", filename); + } + + for (int k = 0; k < solver->depth; k++) { + for (int j = 0; j < solver->height; j++) { + for (int i = 0; i < solver->width; i++) { + if(solver->mask[i][j][k] == true) { + real d = solver->answer[i][j][k]; + fprintf(out, "%lf, %lf, %lf, 0.5, 0.5, 0.5, %lf\n", i + 0.5, j + 0.5, k + 0.5, d); + } + } + } + } + + fclose(out); + +} + +void solve_eikonal(struct eikonal_solver *solver) { + + if (solver->speeds == NULL) { + map_generator(solver); + } + + solver->cuda_mem_created = false; + initialization(solver); + use_seeds(solver); + run_eikonal_solver_simple(&solver->memory_struct, solver->verbose); + get_solution(solver); + +} diff --git a/src/eikonal/eikonal_solver.h b/src/eikonal/eikonal_solver.h new file mode 100644 index 00000000..bede96fd --- /dev/null +++ b/src/eikonal/eikonal_solver.h @@ -0,0 +1,26 @@ +#ifndef MONOALG3D_EIKONAL_SOLVER_H +#define MONOALG3D_EIKONAL_SOLVER_H + +#include "../common_types/common_types.h" +#include "common_def.h" +#include "../alg/cell/cell.h" + +struct eikonal_solver { + bool verbose; + bool cuda_mem_created; + size_t width, height, depth, num_active_cells; + size_t iters_per_block, solver_type; + real ***speeds; + real ***answer; + size_t **seeds; + bool *** mask; + struct cell_node **active_cells; + CUDAMEMSTRUCT memory_struct; +}; + +struct eikonal_solver * new_eikonal_solver(bool verbose); +void free_eikonal_solver(struct eikonal_solver *solver); +void solve_eikonal(struct eikonal_solver *solver); +void write_alg(struct eikonal_solver *solver, char *filename); + +#endif //MONOALG3D_EIKONAL_SOLVER_H diff --git a/src/logger/logger.h b/src/logger/logger.h index 58bf9da7..d56ca2f5 100644 --- a/src/logger/logger.h +++ b/src/logger/logger.h @@ -8,6 +8,8 @@ #include #include +#define LOG_LINE_SEPARATOR "======================================================================\n" + struct logt { FILE *fp; bool quiet; diff --git a/src/main_eikonal.c b/src/main_eikonal.c index 9d10e676..76dc7f7f 100644 --- a/src/main_eikonal.c +++ b/src/main_eikonal.c @@ -1,188 +1,43 @@ #include -#include +#include "alg/cell/cell.h" #include "alg/grid/grid.h" +#include "3dparty/ini_parser/ini.h" #include "config/domain_config.h" -#include "config/save_mesh_config.h" -#include "config/stim_config.h" -#include "utils/heap.h" +#include "eikonal/eikonal_solver.h" +#include "3dparty/stb_ds.h" #include "logger/logger.h" -#include "3dparty/ini_parser/ini.h" -#include "utils/file_utils.h" +#include "config/config_parser.h" -#define WAVE_SPEED 100.0 // um/ms //TODO: this should be a parameter -#define APD 400.0 //TODO: make this a parameter -#define REFRACTORY_PERIOD 200.0 //TODO: make this a parameter -struct heap_point_hash_entry { - struct point_3d key; - struct heap_point value; -}; +static int compare_coordinates(const void *a, const void *b) { -static struct point_3d compute_wave_speed(struct point_3d conductivity_tensor) { - //Compute wave speed based on conductivity tensor (e.g., using eigenvalues) - //Example: For isotropic conductivity, return sqrt(1 / conductivity_tensor) - //For anisotropic conductivity, compute wave speed based on eigenvalues - //return POINT3D(sqrt(conductivity_tensor.x), sqrt(conductivity_tensor.y), sqrt(conductivity_tensor.z)); - //return POINT3D(500.0, 500.0, 500.0); // um/ms - return SAME_POINT3D(WAVE_SPEED); -} + struct cell_node *coord1 = *(struct cell_node **) a; + struct cell_node *coord2 = *(struct cell_node **) b; -//TODO: add an hash map in the heap to make the search faster -static int in_heap(struct point_distance_heap *heap, struct heap_point x, int n) { - for(int i = 0; i < n; i++) { - struct cell_node *cell = x.grid_cell; - if(heap->arr[i].grid_cell->center.x == cell->center.x && heap->arr[i].grid_cell->center.y == cell->center.y && - heap->arr[i].grid_cell->center.z == cell->center.z) { - return i; - } + if (coord1->center.y != coord2->center.y) { + return coord1->center.y - coord2->center.y; } - return -1; -} - -static real euclidean_distance(struct cell_node *cell1, struct cell_node *cell2) { - - real dx = cell1->center.x - cell2->center.x; - real dy = cell1->center.y - cell2->center.y; - real dz = cell1->center.z - cell2->center.z; - - return sqrt(dx * dx + dy * dy + dz * dz); -} - -void compute_t(struct heap_point x, void *neighbour, struct heap_point_hash_entry *known, struct point_3d condutivity_tensor, struct point_distance_heap *heap, real *elapsed_time, real integrated_time, real dt) { - - void *neighbour_grid_cell = neighbour; - struct cell_node *grid_cell = x.grid_cell; - bool has_found; - - struct transition_node *white_neighbor_cell; - uint16_t neighbour_grid_cell_level = ((struct basic_cell_data *)(neighbour_grid_cell))->level; - enum cell_type neighbour_grid_cell_type = ((struct basic_cell_data *)(neighbour_grid_cell))->type; - - if(neighbour_grid_cell_level > grid_cell->cell_data.level) { - if(neighbour_grid_cell_type == TRANSITION_NODE) { - has_found = false; - while(!has_found) { - if(neighbour_grid_cell_type == TRANSITION_NODE) { - white_neighbor_cell = (struct transition_node *)neighbour_grid_cell; - if(white_neighbor_cell->single_connector == NULL) { - has_found = true; - } else { - neighbour_grid_cell = white_neighbor_cell->quadruple_connector1; - neighbour_grid_cell_type = ((struct basic_cell_data *)(neighbour_grid_cell))->type; - } - } else { - break; - } - } - } - } else { - if(neighbour_grid_cell_level <= grid_cell->cell_data.level && (neighbour_grid_cell_type == TRANSITION_NODE)) { - has_found = false; - while(!has_found) { - if(neighbour_grid_cell_type == TRANSITION_NODE) { - white_neighbor_cell = (struct transition_node *)(neighbour_grid_cell); - if(white_neighbor_cell->single_connector == 0) { - has_found = true; - } else { - neighbour_grid_cell = white_neighbor_cell->single_connector; - neighbour_grid_cell_type = ((struct basic_cell_data *)(neighbour_grid_cell))->type; - } - } else { - break; - } - } - } + if (coord1->center.x != coord2->center.x) { + return coord1->center.x - coord2->center.x; } - if(neighbour_grid_cell_type == CELL_NODE) { - struct cell_node *neighbour_cell = (struct cell_node *)(neighbour_grid_cell); - - struct heap_point tmp = hmget(known, neighbour_cell->center); + return coord1->center.z - coord2->center.z; - if(!neighbour_cell->active || tmp.grid_cell != NULL) { - return; - } - - struct heap_point xi; - xi.grid_cell = neighbour_cell; - - int index = in_heap(heap, xi, heap->size); - - real neighbour_time = INFINITY; - - real wave_speed = compute_wave_speed(condutivity_tensor).x; - - //TODO: we have to know the neighbour's direction to calculate the wave speed - real tentative_time = x.time + euclidean_distance(grid_cell, neighbour_grid_cell)/wave_speed; - - if(elapsed_time != NULL) { - *elapsed_time = tentative_time - integrated_time; - if(*elapsed_time > dt) { - heap_push(heap, x); - return; - } - } - - if(index != -1) { - neighbour_time = heap->arr[index].time; - - if(tentative_time < neighbour_time) { - heap->arr[index].time = tentative_time; - heap->arr[index].grid_cell->v = tentative_time; - } - - } else { - xi.time = tentative_time; - xi.grid_cell->v = tentative_time; - heap_push(heap, xi); - } - } } - -int main(int argc, char **argv) { +int main(int argc, char *argv[]) { struct eikonal_options *eikonal_options = new_eikonal_options(); parse_eikonal_options(argc, argv, eikonal_options); - struct grid *grid = new_grid(); - - if (ini_parse(eikonal_options->config_file, parse_eikonal_config_file, eikonal_options) < 0) { - log_error_and_exit("Error: Can't load the config file %s\n", eikonal_options->config_file); - } - - if(eikonal_options->dt_was_set == false) { - log_error_and_exit("The time step was not set! Exiting!\n"); - } + struct grid *grid = new_grid(); - if(eikonal_options->final_time_was_set == false) { - log_error_and_exit("The simulation final time was not set! Exiting!\n"); + if(ini_parse(eikonal_options->config_file, parse_eikonal_config_file, eikonal_options) < 0) { + log_error_and_exit("Error: Can't load the config file %s\n", eikonal_options->config_file); } struct config *domain_config = eikonal_options->domain_config; - struct config *save_mesh_config = eikonal_options->save_mesh_config; - struct string_voidp_hash_entry *stimuli_configs = eikonal_options->stim_configs; - - bool save_to_file = (save_mesh_config != NULL); - char *out_dir_name = NULL; - - if(save_to_file) { - init_config_functions(save_mesh_config, "./shared_libs/libdefault_save_mesh.so", "save_result"); - GET_PARAMETER_STRING_VALUE_OR_USE_DEFAULT(out_dir_name, save_mesh_config, "output_dir"); - - if(out_dir_name == NULL) { - log_error("No output directory provided to save the results! Exiting!\n"); - exit(EXIT_FAILURE); - } - - create_dir(out_dir_name); - - } else { - log_error("No configuration provided to save the results! Exiting!\n"); - free(out_dir_name); - exit(EXIT_FAILURE); - } // Configure the functions and set the mesh domain if(domain_config) { @@ -191,151 +46,51 @@ int main(int argc, char **argv) { print_domain_config_values(domain_config); log_msg(LOG_LINE_SEPARATOR); - + bool success = ((set_spatial_domain_fn *)eikonal_options->domain_config->main_function)(domain_config, grid); if(!success) { log_error_and_exit("Error configuring the tissue domain!\n"); - } - - order_grid_cells(grid); - } - - real simulation_time = eikonal_options->final_time; - real dt = eikonal_options->dt; - - struct point_3d condutivity_tensor = SAME_POINT3D(0.00005336); - - struct heap_point *data = (struct heap_point *) malloc(grid->num_active_cells * sizeof(struct heap_point)); - struct point_distance_heap *trial = build_heap(data, 0, grid->num_active_cells); - - struct heap_point_hash_entry *known = NULL; - struct heap_point default_entry = {NULL, -1}; - hmdefault(known, default_entry); - - struct heap_point_hash_entry *refractory = NULL; - hmdefault(refractory, default_entry); - - if(stimuli_configs) { - - size_t n = shlen(stimuli_configs); - struct time_info time_info = {0.0, 0.0, 0.0, 0}; - - if(n > 0) { - STIM_CONFIG_HASH_FOR_INIT_FUNCTIONS(stimuli_configs); } - for(int i = 0; i < n; i++) { - - struct string_voidp_hash_entry e = stimuli_configs[i]; - log_info("Stimulus name: %s\n", e.key); - struct config *tmp = (struct config *)e.value; - print_stim_config_values(tmp); - log_msg(LOG_LINE_SEPARATOR); - } - - set_spatial_stim(&time_info, stimuli_configs, grid, false); - - struct config *tmp = NULL; - - uint32_t n_active = grid->num_active_cells; - struct cell_node **ac = grid->active_cells; - - for(size_t k = 0; k < n; k++) { - - real stim_start = 0.0; - real stim_dur = 0.0; - real stim_period = 0.0; - - tmp = (struct config *)stimuli_configs[k].value; - GET_PARAMETER_NUMERIC_VALUE_OR_REPORT_ERROR(real, stim_start, tmp, "start"); - GET_PARAMETER_NUMERIC_VALUE_OR_REPORT_ERROR(real, stim_dur, tmp, "duration"); - GET_PARAMETER_NUMERIC_VALUE_OR_USE_DEFAULT(real, stim_period, tmp, "period"); - - for(uint32_t i = 0; i < n_active; i++) { - real value = ((real *)(tmp->persistent_data))[i]; - if(value != 0.0) { - struct heap_point node; - node.grid_cell = ac[i]; - node.time = 0.0; - node.grid_cell->v = node.time; - hmput(known, ac[i]->center, node); - } - } - - // if(stim_period > 0.0) { - // if(time >= stim_start + stim_period) { - // stim_start = stim_start + stim_period; - // sds stim_start_char = sdscatprintf(sdsempty(), "%lf", stim_start); - // shput_dup_value(tmp->config_data, "start", stim_start_char); - // sdsfree(stim_start_char); - // } - // } + order_grid_cells(grid); - // time = cur_time; - } + qsort(grid->active_cells, grid->num_active_cells, sizeof(grid->active_cells[0]), compare_coordinates); } - struct time_info ti = ZERO_TIME_INFO; - ti.dt = dt; - ti.final_t = simulation_time; - ti.iteration = 0; - - CALL_INIT_SAVE_MESH(save_mesh_config); + for(int i = 0 ; i < grid->num_active_cells; i++) { + int new_pos_x = (grid->active_cells[i]->center.x-grid->active_cells[i]->discretization.x/2)/grid->active_cells[i]->discretization.x; + int new_pos_y = (grid->active_cells[i]->center.y-grid->active_cells[i]->discretization.y/2)/grid->active_cells[i]->discretization.y; + int new_pos_z = (grid->active_cells[i]->center.z-grid->active_cells[i]->discretization.z/2)/grid->active_cells[i]->discretization.z; - ((save_mesh_fn *)save_mesh_config->main_function)(&ti, save_mesh_config, grid, NULL, NULL); - - for(int i = 0; i < hmlen(known); i++) { - struct heap_point x = known[i].value; - for (int j = 0; j <= LEFT; j++) { - compute_t(x, x.grid_cell->neighbours[j], known, condutivity_tensor, trial, NULL, 0.0, dt); - } + grid->active_cells[i]->center.x = new_pos_x; + grid->active_cells[i]->center.y = new_pos_y; + grid->active_cells[i]->center.z = new_pos_z; } - - real integrated_time = 0; - while(integrated_time < simulation_time) { - - real elapsed_time = 0.0; - ti.current_t += dt; - ti.iteration += 1; + size_t itersPerBlock = 10, type = 1; - while(trial->size > 0 && elapsed_time <= dt) { + struct eikonal_solver *solver = new_eikonal_solver(false); + solver->width = grid->cube_side_length.x/grid->active_cells[0]->discretization.x; + solver->height = grid->cube_side_length.y/grid->active_cells[0]->discretization.y; + solver->depth = grid->cube_side_length.z/grid->active_cells[0]->discretization.z; - struct heap_point x = heap_pop(trial); + solver->solver_type = type; + solver->iters_per_block = itersPerBlock; - hmput(known, x.grid_cell->center, x); + size_t *initial_seed = calloc(sizeof(size_t), 3); + initial_seed[0] = grid->active_cells[0]->center.x; + initial_seed[1] = grid->active_cells[0]->center.y; + initial_seed[2] = grid->active_cells[0]->center.z; - for (int i = 0; i <= LEFT; i++) { - compute_t(x, x.grid_cell->neighbours[i], known, condutivity_tensor, trial, &elapsed_time, integrated_time, dt); - } - - for(int i = 0; i < hmlen(known); i++) { - struct heap_point x = known[i].value; - if(elapsed_time - x.time > APD) { - hmdel(known, x.grid_cell->center); - x.repolarization_time = x.time + APD; - hmput(refractory, x.grid_cell->center, x); - } - } + arrput(solver->seeds, initial_seed); + solver->active_cells = grid->active_cells; + solver->num_active_cells = grid->num_active_cells; - for(int i = 0; i < hmlen(refractory); i++) { - struct heap_point x = refractory[i].value; - if(elapsed_time - x.repolarization_time > REFRACTORY_PERIOD) { - hmdel(refractory, x.grid_cell->center); - } - } - - } - - integrated_time += dt; + solve_eikonal(solver); + write_alg(solver, "test.alg"); - ((save_mesh_fn *)save_mesh_config->main_function)(&ti, save_mesh_config, grid, NULL, NULL); - - } - - CALL_END_SAVE_MESH(save_mesh_config, grid); - free_config_data(save_mesh_config); + free_eikonal_solver(solver); } diff --git a/src/utils/build.sh b/src/utils/build.sh index 6de5bfaa..cdfea18b 100755 --- a/src/utils/build.sh +++ b/src/utils/build.sh @@ -1,4 +1,4 @@ -UTILS_SOURCE_FILES="search.c stop_watch.c sort.c file_utils.c batch_utils.c heap.c" -UTILS_HEADER_FILES="utils.h stop_watch.h file_utils.h batch_utils.c heap.h" +UTILS_SOURCE_FILES="search.c stop_watch.c sort.c file_utils.c batch_utils.c" +UTILS_HEADER_FILES="utils.h stop_watch.h file_utils.h batch_utils.c" COMPILE_STATIC_LIB "utils" "$UTILS_SOURCE_FILES" "$UTILS_HEADER_FILES" diff --git a/src/utils/file_utils.h b/src/utils/file_utils.h index d9f3e986..44f1ba36 100644 --- a/src/utils/file_utils.h +++ b/src/utils/file_utils.h @@ -5,8 +5,6 @@ #ifndef MONOALG3D_FILE_UTILS_H #define MONOALG3D_FILE_UTILS_H -#define LOG_LINE_SEPARATOR "======================================================================\n" - #include #include #include diff --git a/src/utils/heap.c b/src/utils/heap.c deleted file mode 100644 index 0e41df24..00000000 --- a/src/utils/heap.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include "heap.h" - -static void heapify(struct point_distance_heap* h, int index) { - int left = index * 2 + 1; - int right = index * 2 + 2; - int min = index; - - if (left >= h->size || left < 0) - left = -1; - if (right >= h->size || right < 0) - right = -1; - - if (left != -1 && h->arr[left].time < h->arr[index].time) - min = left; - if (right != -1 && h->arr[right].time < h->arr[min].time) - min = right; - - if (min != index) { - struct heap_point temp = h->arr[min]; - h->arr[min] = h->arr[index]; - h->arr[index] = temp; - heapify(h, min); - } -} - -struct point_distance_heap * build_heap(struct heap_point* data, int size, int capacity) { - - struct point_distance_heap* h = (struct point_distance_heap *) malloc(sizeof(struct point_distance_heap)); - if (h == NULL) { - fprintf(stderr, "Error allocating memory for heap\n"); - return NULL; - } - - h->capacity = capacity; - h->size = size; - - h->arr = data; - - int i = (h->size - 2) / 2; - while (i >= 0) { - heapify(h, i); - i--; - } - return h; -} - -void insert_helper(struct point_distance_heap* h, int index) { - - int parent = (index - 1) / 2; - - if (h->arr[parent].time > h->arr[index].time) { - struct heap_point temp = h->arr[parent]; - h->arr[parent] = h->arr[index]; - h->arr[index] = temp; - insert_helper(h, parent); - } -} - -struct heap_point heap_pop(struct point_distance_heap* h) { - - struct heap_point delete_item = {NULL, -1}; - - if (h->size == 0) { - printf("\nHeap id empty."); - return delete_item; - } - - delete_item = h->arr[0]; - - h->arr[0] = h->arr[h->size - 1]; - h->size--; - - heapify(h, 0); - return delete_item; -} - -void heap_push(struct point_distance_heap* h, struct heap_point data) { - - // Checking if heap is full or not - if (h->size < h->capacity) { - // Inserting data into an array - h->arr[h->size] = data; - // Calling insertHelper function - insert_helper(h, h->size); - // Incrementing size of array - h->size++; - } -} diff --git a/src/utils/heap.h b/src/utils/heap.h deleted file mode 100644 index 320b4195..00000000 --- a/src/utils/heap.h +++ /dev/null @@ -1,18 +0,0 @@ - -#include "../common_types/common_types.h" - -struct heap_point { - struct cell_node *grid_cell; - real time; - real repolarization_time; -}; - -struct point_distance_heap { - struct heap_point * arr; - int size; - int capacity; -}; - -struct point_distance_heap * build_heap(struct heap_point* data, int size, int capacity); -struct heap_point heap_pop(struct point_distance_heap* h); -void heap_push(struct point_distance_heap *h, struct heap_point data);